Aluno: Felipe Martim Fernandes Vieira

1 Introdução

O objetivo desse trabalho é fazer a análise exploratória de uma base de dados de vinhos de uma região de Portugal, e construir modelos utilizando as técnicas aprendidas em sala de aula. As diferentes etapas do trabalho seguem abaixo:

  1. Estimar a variável “quality” em função das características físico-químicas dos vinhos através da construção dos modelos preditivos listados.
    • Regressão Linear
    • Árvore de Regressão
  2. Categorização dos vinhos em “bons” ou “ruins”, sendo que os vinhos com notas maiores ou iguais a 6 serão considerados de boa qualidade.
    • Regressão Logística
    • Árvore de Decisão
  3. Definir grupos de vinhos utilizando métodos de clusterização
    • Hierárquica
    • K-Means
    • Análise de Componentes Principais

2 Preparações

2.1 Carregando as bibliotecas necessárias

library(dplyr)
library(corrgram)
library('GGally')
library(plotly)
library(caret)
library(e1071)
library(lmtest)
library(DAAG)
library(rpart)
library(rpart.plot)
library(rattle)
library(tclust)
library(cluster)
library(fpc)
library(ROCR)
Loading required package: gplots

Attaching package: ‘gplots’

The following object is masked from ‘package:stats’:

    lowess

2.2 Funções auxiliares

2.2.1 Multiplot

# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

2.3 Carregando os dados

setwd('~/workspace/fiap/wine')

wines <- read.csv2(file="BaseWine_Red_e_White.csv"
                   , header=TRUE
                   , sep=";")

3 Visão geral dos dados

str(wines)
'data.frame':   6497 obs. of  14 variables:
 $ id_vinho          : int  1 2 3 4 5 6 7 8 9 10 ...
 $ fixedacidity      : num  6.6 6.7 10.6 5.4 6.7 6.8 6.6 7.2 5.1 6.2 ...
 $ volatileacidity   : num  0.24 0.34 0.31 0.18 0.3 0.5 0.61 0.66 0.26 0.22 ...
 $ citricacid        : num  0.35 0.43 0.49 0.24 0.44 0.11 0 0.33 0.33 0.2 ...
 $ residualsugar     : num  7.7 1.6 2.2 4.8 18.8 ...
 $ chlorides         : num  0.031 0.041 0.063 0.041 0.057 0.075 0.069 0.068 0.027 0.035 ...
 $ freesulfurdioxide : num  36 29 18 30 65 16 4 34 46 58 ...
 $ totalsulfurdioxide: num  135 114 40 113 224 49 8 102 113 184 ...
 $ density           : num  0.994 0.99 0.998 0.994 1 ...
 $ pH                : num  3.19 3.23 3.14 3.42 3.11 3.36 3.33 3.27 3.35 3.11 ...
 $ sulphates         : num  0.37 0.44 0.51 0.4 0.53 0.79 0.37 0.78 0.43 0.53 ...
 $ alcohol           : num  10.5 12.6 9.8 9.4 9.1 9.5 10.4 12.8 11.4 9 ...
 $ quality           : int  5 6 6 6 5 5 4 6 7 6 ...
 $ Vinho             : Factor w/ 2 levels "RED","WHITE": 2 2 1 2 2 1 1 1 2 2 ...
summary(wines)
    id_vinho     fixedacidity    volatileacidity    citricacid     residualsugar     chlorides       freesulfurdioxide
 Min.   :   1   Min.   : 3.800   Min.   :0.0800   Min.   :0.0000   Min.   : 0.60   Min.   :0.00900   Min.   :  1.00   
 1st Qu.:1625   1st Qu.: 6.400   1st Qu.:0.2300   1st Qu.:0.2500   1st Qu.: 1.80   1st Qu.:0.03800   1st Qu.: 17.00   
 Median :3249   Median : 7.000   Median :0.2900   Median :0.3100   Median : 3.00   Median :0.04700   Median : 29.00   
 Mean   :3249   Mean   : 7.215   Mean   :0.3397   Mean   :0.3186   Mean   : 5.44   Mean   :0.05603   Mean   : 30.53   
 3rd Qu.:4873   3rd Qu.: 7.700   3rd Qu.:0.4000   3rd Qu.:0.3900   3rd Qu.: 8.10   3rd Qu.:0.06500   3rd Qu.: 41.00   
 Max.   :6497   Max.   :15.900   Max.   :1.5800   Max.   :1.6600   Max.   :45.80   Max.   :0.61100   Max.   :289.00   
 totalsulfurdioxide    density             pH          sulphates         alcohol           quality        Vinho     
 Min.   :  6.0      Min.   :0.9871   Min.   :2.720   Min.   :0.2200   Min.   : 0.9567   Min.   :3.000   RED  :1599  
 1st Qu.: 77.0      1st Qu.:0.9923   1st Qu.:3.110   1st Qu.:0.4300   1st Qu.: 9.5000   1st Qu.:5.000   WHITE:4898  
 Median :118.0      Median :0.9949   Median :3.210   Median :0.5100   Median :10.3000   Median :6.000               
 Mean   :115.7      Mean   :0.9947   Mean   :3.219   Mean   :0.5313   Mean   :10.4862   Mean   :5.818               
 3rd Qu.:156.0      3rd Qu.:0.9970   3rd Qu.:3.320   3rd Qu.:0.6000   3rd Qu.:11.3000   3rd Qu.:6.000               
 Max.   :440.0      Max.   :1.0140   Max.   :4.010   Max.   :2.0000   Max.   :14.9000   Max.   :9.000               

Verificando se há valores faltantes

sum(is.na(wines))
[1] 0

3.1 Renomeando e removendo variáveis

A variável id_vinho não é necessária para a nossa análise e será removida. Para manter a consistência na nomenclatura, renomeei Vinho para type.

wines_adjusted <- wines %>% select(-id_vinho) %>% rename(type = Vinho)

4 Visualizando as características individualmente

p1 <- wines_adjusted %>% ggplot(aes(x = chlorides)) + 
  geom_histogram(bins = 40, fill = 'lightblue')

p2 <- wines_adjusted %>% ggplot(aes(x = sulphates)) + 
  geom_histogram(bins = 40, fill = 'lightblue') 

p3 <- wines_adjusted %>% ggplot(aes(x = fixedacidity)) + 
  geom_histogram(bins = 40, fill = 'lightblue') 

p4 <- wines_adjusted %>% ggplot(aes(x = freesulfurdioxide)) + 
  geom_histogram(bins = 40, fill = 'lightblue') 

p5 <- wines_adjusted %>% ggplot(aes(x = alcohol)) + 
  geom_histogram(bins = 40, fill = 'lightblue') 

p6 <- wines_adjusted %>% ggplot(aes(x = volatileacidity)) + 
  geom_histogram(bins = 40, fill = 'lightblue') 

p7 <- wines_adjusted %>% ggplot(aes(x = totalsulfurdioxide)) + 
  geom_histogram(bins = 40, fill = 'lightblue') 

p8 <- wines_adjusted %>% ggplot(aes(x = citricacid)) + 
  geom_histogram(bins = 40, fill = 'lightblue') 

p9 <- wines_adjusted %>% ggplot(aes(x = density)) + 
  geom_histogram(bins = 40, fill = 'lightblue') 

p10 <- wines_adjusted %>% ggplot(aes(x = pH)) + 
  geom_histogram(bins = 40, fill = 'lightblue') 

p11 <- wines_adjusted %>% ggplot(aes(x = residualsugar)) + 
  geom_histogram(bins = 40, fill = 'lightblue') 

p12 <- wines_adjusted %>% ggplot(aes(x = quality)) + 
  geom_histogram(bins = 6, fill = 'lightblue') 

multiplot(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, cols = 3 )

5 Verificando a presença de outliers

Ao visualizar os boxplots abaixo, é possível verificar que existem inúmeros outliers, que provavelmente serão removidos para a modelagem.

par (mfrow=c(2,2))
boxplot(wines_adjusted$chlorides, main='chlorides')
boxplot(wines_adjusted$sulphates, main='sulphates')
boxplot(wines_adjusted$fixedacidity, main='fixedacidity')
boxplot(wines_adjusted$residualsugar, main='residualsugar')

boxplot(wines_adjusted$freesulfurdioxide, main='freesulfurdioxide')
boxplot(wines_adjusted$alcohol, main='alcohol')
boxplot(wines_adjusted$volatileacidity, main='volatileacidity')
boxplot(wines_adjusted$totalsulfurdioxide, main='totalsulfurdioxide')

boxplot(wines_adjusted$quality, main='quality')
boxplot(wines_adjusted$citricacid, main='citricacid')
boxplot(wines_adjusted$density, main='density')
boxplot(wines_adjusted$pH, main='pH')
par (mfrow=c(1,1))

5.1 Padronizando as variáveis

wines_padr <- preProcess(wines_adjusted[,1:11], c("center", "scale")) %>% 
  predict(., wines_adjusted) %>% 
  data.frame(trans = .)

colnames(wines_padr) <- colnames(wines_adjusted)

str(wines_padr)
'data.frame':   6497 obs. of  14 variables:
 $ fixedacidity      : num  -0.475 -0.397 2.611 -1.4 -0.397 ...
 $ volatileacidity   : num  -0.60537 0.00203 -0.18019 -0.96981 -0.24093 ...
 $ citricacid        : num  0.216 0.766 1.179 -0.541 0.835 ...
 $ residualsugar     : num  0.478 -0.813 -0.686 -0.135 2.817 ...
 $ chlorides         : num  -0.7146 -0.4291 0.1988 -0.4291 0.0276 ...
 $ freesulfurdioxide : num  0.3084 -0.0859 -0.7057 -0.0296 1.9423 ...
 $ totalsulfurdioxide: num  0.3407 -0.0309 -1.3401 -0.0486 1.9153 ...
 $ density           : num  -0.3019 -1.5394 0.983 -0.0821 1.6457 ...
 $ pH                : num  -0.1773 0.0715 -0.4882 1.2532 -0.6748 ...
 $ sulphates         : num  -1.08375 -0.61334 -0.14293 -0.88214 -0.00852 ...
 $ alcohol           : num  0.0114 1.739 -0.5645 -0.8936 -1.1404 ...
 $ quality           : int  5 6 6 6 5 5 4 6 7 6 ...
 $ type              : Factor w/ 2 levels "RED","WHITE": 2 2 1 2 2 1 1 1 2 2 ...
 $ good              : Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 2 2 2 ...
summary(wines_padr)
  fixedacidity     volatileacidity     citricacid       residualsugar       chlorides       freesulfurdioxide 
 Min.   :-2.6344   Min.   :-1.5772   Min.   :-2.19266   Min.   :-1.0243   Min.   :-1.3425   Min.   :-1.66345  
 1st Qu.:-0.6289   1st Qu.:-0.6661   1st Qu.:-0.47230   1st Qu.:-0.7704   1st Qu.:-0.5148   1st Qu.:-0.76202  
 Median :-0.1661   Median :-0.3017   Median :-0.05941   Median :-0.5164   Median :-0.2579   Median :-0.08594  
 Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000  
 3rd Qu.: 0.3739   3rd Qu.: 0.3665   3rd Qu.: 0.49111   3rd Qu.: 0.5629   3rd Qu.: 0.2559   3rd Qu.: 0.59014  
 Max.   : 6.6989   Max.   : 7.5338   Max.   : 9.23057   Max.   : 8.5416   Max.   :15.8410   Max.   :14.56245  
 totalsulfurdioxide    density               pH             sulphates          alcohol           quality     
 Min.   :-1.9416    Min.   :-2.56383   Min.   :-3.10038   Min.   :-2.0918   Min.   :-7.8396   Min.   :3.000  
 1st Qu.:-0.6855    1st Qu.:-0.79551   1st Qu.:-0.67481   1st Qu.:-0.6805   1st Qu.:-0.8113   1st Qu.:5.000  
 Median : 0.0399    Median : 0.06668   Median :-0.05287   Median :-0.1429   Median :-0.1532   Median :6.000  
 Mean   : 0.0000    Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   :5.818  
 3rd Qu.: 0.7122    3rd Qu.: 0.77672   3rd Qu.: 0.63126   3rd Qu.: 0.4619   3rd Qu.: 0.6695   3rd Qu.:6.000  
 Max.   : 5.7368    Max.   : 6.52124   Max.   : 4.92265   Max.   : 9.8701   Max.   : 3.6311   Max.   :9.000  
    type      good    
 RED  :1599   0:2384  
 WHITE:4898   1:4113  
                      
                      
                      
                      

5.2 Remoção dos outliers

Para a remoção dos outliers, foi utilizado o método Z-Score. Os valores com mais de 3 desvios padrões foram removidos.

wines_padr <- wines_padr[!abs(wines_padr$chlorides) > 3,] %>%
              .[!abs(.$sulphates) > 3,] %>%
              .[!abs(.$fixedacidity) > 3,] %>%
              .[!abs(.$residualsugar) > 3,] %>%
              .[!abs(.$freesulfurdioxide) > 3,] %>%
              .[!abs(.$alcohol) > 3,] %>%
              .[!abs(.$volatileacidity) > 3,] %>%
              .[!abs(.$totalsulfurdioxide) > 3,] %>%
              .[!abs(.$citricacid) > 3,] %>%
              .[!abs(.$density) > 3,] %>%
              .[!abs(.$pH) > 3,]

str(wines_padr)
'data.frame':   6007 obs. of  13 variables:
 $ fixedacidity      : num  -0.475 -0.397 2.611 -1.4 -0.397 ...
 $ volatileacidity   : num  -0.60537 0.00203 -0.18019 -0.96981 -0.24093 ...
 $ citricacid        : num  0.216 0.766 1.179 -0.541 0.835 ...
 $ residualsugar     : num  0.478 -0.813 -0.686 -0.135 2.817 ...
 $ chlorides         : num  -0.7146 -0.4291 0.1988 -0.4291 0.0276 ...
 $ freesulfurdioxide : num  0.3084 -0.0859 -0.7057 -0.0296 1.9423 ...
 $ totalsulfurdioxide: num  0.3407 -0.0309 -1.3401 -0.0486 1.9153 ...
 $ density           : num  -0.3019 -1.5394 0.983 -0.0821 1.6457 ...
 $ pH                : num  -0.1773 0.0715 -0.4882 1.2532 -0.6748 ...
 $ sulphates         : num  -1.08375 -0.61334 -0.14293 -0.88214 -0.00852 ...
 $ alcohol           : num  0.0114 1.739 -0.5645 -0.8936 -1.1404 ...
 $ quality           : int  5 6 6 6 5 5 4 6 7 5 ...
 $ type              : Factor w/ 2 levels "RED","WHITE": 2 2 1 2 2 1 1 1 2 2 ...
summary(wines_padr)
  fixedacidity      volatileacidity      citricacid       residualsugar        chlorides        freesulfurdioxide  totalsulfurdioxide
 Min.   :-2.55725   Min.   :-1.57721   Min.   :-2.19266   Min.   :-1.02435   Min.   :-1.34254   Min.   :-1.66345   Min.   :-1.94163  
 1st Qu.:-0.62888   1st Qu.:-0.72685   1st Qu.:-0.47230   1st Qu.:-0.77039   1st Qu.:-0.54330   1st Qu.:-0.70568   1st Qu.:-0.56163  
 Median :-0.24321   Median :-0.30167   Median :-0.05941   Median :-0.47410   Median :-0.28641   Median :-0.08594   Median : 0.07529  
 Mean   :-0.09333   Mean   :-0.08095   Mean   :-0.03948   Mean   : 0.01329   Mean   :-0.12702   Mean   : 0.01057   Mean   : 0.04015  
 3rd Qu.: 0.29673   3rd Qu.: 0.30573   3rd Qu.: 0.42229   3rd Qu.: 0.58408   3rd Qu.: 0.08467   3rd Qu.: 0.64648   3rd Qu.: 0.71221  
 Max.   : 2.99645   Max.   : 2.97828   Max.   : 2.89962   Max.   : 2.97556   Max.   : 2.99616   Max.   : 2.95642   Max.   : 2.94144  
    density                pH               sulphates           alcohol           quality        type     
 Min.   :-2.563832   Min.   :-2.9759884   Min.   :-2.09177   Min.   :-2.0453   Min.   :3.00   RED  :1277  
 1st Qu.:-0.856366   1st Qu.:-0.6748102   1st Qu.:-0.68054   1st Qu.:-0.8113   1st Qu.:5.00   WHITE:4730  
 Median : 0.002439   Median :-0.0528702   Median :-0.21013   Median :-0.1532   Median :6.00               
 Mean   :-0.065188   Mean   :-0.0002015   Mean   :-0.08039   Mean   : 0.0185   Mean   :5.84               
 3rd Qu.: 0.712475   3rd Qu.: 0.6312639   3rd Qu.: 0.39469   3rd Qu.: 0.6695   3rd Qu.:6.00               
 Max.   : 2.673526   Max.   : 2.9946361   Max.   : 2.94835   Max.   : 2.9319   Max.   :9.00               

6 Interação entre variáveis

6.1 Correlações

wines_padr %>%
  select(-type) %>%
  ggcorr(method = c("pairwise","spearman"), label = FALSE, angle = -0, hjust = 0.2) +
  coord_flip()
Coordinate system already present. Adding new coordinate system, which will replace the existing one.

wines_adjusted %>%
  select(-citricacid, -totalsulfurdioxide) %>%
  ggpairs()

6.2 Gráficos de dispersão

p1 <- wines_padr %>% 
  ggplot(aes(x = density, y = alcohol, color = type)) +
   geom_point(alpha = 0.2, size = 2) +
   geom_smooth(method = 'lm')

p2 <- wines_padr %>% 
  ggplot(aes(x = density, y = fixedacidity, color = type)) +
   geom_point(alpha = 0.2, size = 2) +
   geom_smooth(method = 'lm')

p3 <- wines_padr %>% 
  ggplot(aes(x = volatileacidity, y = quality, color = type)) +
   geom_point(alpha = 0.2, size = 2) +
   geom_smooth(method = 'lm')

p4 <- wines_padr %>% 
  ggplot(aes(x = alcohol, y = quality, color = type)) +
   geom_point(alpha = 0.2, size = 2) +
   geom_smooth(method = 'lm')

p5 <- wines_padr %>% 
  ggplot(aes(x = residualsugar, y = alcohol, color = type)) +
   geom_point(alpha = 0.2, size = 2) +
   geom_smooth(method = 'lm')

p6 <- wines_padr %>% 
  ggplot(aes(x = density, y = chlorides, color = type)) +
   geom_point(alpha = 0.2, size = 2) +
   geom_smooth(method = 'lm')

multiplot(p1, p2, p3, p4, p5, p6, cols = 2 )

7 Separação dos dados de treino e teste

Utilizarei 80% dos dados para treino, e 20% para teste.

set.seed(32312)

train_ind <- floor(0.8 * nrow(wines_padr)) %>%
  sample(seq_len(nrow(wines_padr)), size = .)

train <- wines_padr[train_ind, ]
test <- wines_padr[-train_ind, ]

8 Etapa 1 - Estimativa da variável quality

8.1 Regressão linear

Testando a regressão linear com todas as variáveis.

8.1.1 Primeira tentativa de se gerar um modelo

lm_01 <- lm(quality ~ fixedacidity + volatileacidity
               + citricacid + residualsugar + chlorides
               + freesulfurdioxide + totalsulfurdioxide
               + density + pH + sulphates + alcohol + type, data=train)
summary(lm_01)

Call:
lm(formula = quality ~ fixedacidity + volatileacidity + citricacid + 
    residualsugar + chlorides + freesulfurdioxide + totalsulfurdioxide + 
    density + pH + sulphates + alcohol + type, data = train)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.3562 -0.4791 -0.0462  0.4492  3.0019 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)         6.14330    0.05492 111.856  < 2e-16 ***
fixedacidity        0.16998    0.02777   6.121 1.00e-09 ***
volatileacidity    -0.22871    0.01724 -13.267  < 2e-16 ***
citricacid         -0.01724    0.01408  -1.224  0.22084    
residualsugar       0.35439    0.03726   9.510  < 2e-16 ***
chlorides          -0.02325    0.02737  -0.849  0.39566    
freesulfurdioxide   0.12012    0.01689   7.112 1.31e-12 ***
totalsulfurdioxide -0.06493    0.02159  -3.007  0.00265 ** 
density            -0.42574    0.06051  -7.036 2.25e-12 ***
pH                  0.11342    0.01820   6.232 5.01e-10 ***
sulphates           0.12198    0.01495   8.159 4.26e-16 ***
alcohol             0.22863    0.03046   7.506 7.23e-14 ***
typeWHITE          -0.42814    0.07238  -5.915 3.55e-09 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.7247 on 4792 degrees of freedom
Multiple R-squared:  0.2961,    Adjusted R-squared:  0.2943 
F-statistic:   168 on 12 and 4792 DF,  p-value: < 2.2e-16

Pelos resultados acima, é possível concluir que o modelo linear é estatisticamente significante, devido o p-value ser inferior a 0,05. Entretanto há variáveis que não são significantes, e podem ser removidas.

8.1.2 Segunda tentativa de se gerar um modelo

Como vimos na seção anterior, vamos remover citricacid, chlorides e totalsulfurdioxide e criar um novo modelo. Como vimos durante a análise exploratória, há uma correlação muito forte entre totalsulfurdioxide e freesulfurdioxide. Por esse motivo, utilizaremos somente uma delas.

lm_02 <- lm(quality ~ fixedacidity + volatileacidity
               + residualsugar + freesulfurdioxide + density 
               + pH + sulphates + alcohol + type, data=train)
summary(lm_02)

Call:
lm(formula = quality ~ fixedacidity + volatileacidity + residualsugar + 
    freesulfurdioxide + density + pH + sulphates + alcohol + 
    type, data = train)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.3258 -0.4823 -0.0465  0.4566  3.0180 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        6.23068    0.04580 136.041  < 2e-16 ***
fixedacidity       0.17508    0.02706   6.470 1.08e-10 ***
volatileacidity   -0.23022    0.01597 -14.412  < 2e-16 ***
residualsugar      0.37796    0.03602  10.494  < 2e-16 ***
freesulfurdioxide  0.08877    0.01368   6.489 9.53e-11 ***
density           -0.47691    0.05796  -8.228 2.43e-16 ***
pH                 0.12157    0.01787   6.802 1.16e-11 ***
sulphates          0.11968    0.01492   8.024 1.28e-15 ***
alcohol            0.21846    0.03024   7.224 5.85e-13 ***
typeWHITE         -0.54185    0.05929  -9.138  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.7254 on 4795 degrees of freedom
Multiple R-squared:  0.2943,    Adjusted R-squared:  0.2929 
F-statistic: 222.1 on 9 and 4795 DF,  p-value: < 2.2e-16

Apesar de todas as variáveis independentes possuírem um baixo p-value, o nosso R-Squared se encontra um pouco baixo. Não descartarei necessariamente o modelo devido a um valor pequeno do R quadrado. Nesse caso é melhor verificar a acurácia da predição nos dados de teste.

8.1.3 Análise do modelo escolhido

8.1.3.1 MSE

qualityPredict <- predict(lm_02, test, interval = "prediction", level = 0.95)

mse <- mean((test$quality  - qualityPredict[,1])^2)
sqrt(mse)
[1] 0.7274402

8.1.3.2 Erro usando média

erro_usando_media <- mean((test$quality  - mean(test$quality))^2)
sqrt(erro_usando_media)
[1] 0.8691678

8.1.3.3 Correlação entre valores reais e preditos

actuals_preds <- data.frame(cbind(actuals=test$quality, predicteds=qualityPredict[,1]))
correlation_accuracy <- cor(actuals_preds)
correlation_accuracy
            actuals predicteds
actuals    1.000000   0.547456
predicteds 0.547456   1.000000

8.1.3.4 Min Max Accuracy

min_max_accuracy <- mean(apply(actuals_preds, 1, min) / apply(actuals_preds, 1, max))
min_max_accuracy
[1] 0.9109673

8.1.3.5 MAPE

mape <- mean(abs((actuals_preds$predicteds - actuals_preds$actuals))/actuals_preds$actuals)
mape
[1] 0.7221432

8.1.3.6 Gráfico Resíduo

É possível notar que os resíduos estão dispersos aleatoriamente em torno de zero, com variância constante.

lm_02 %>%
  resid() %>%
  plot(predict(lm_02), ., xlab = "Preditor linear", ylab = "Residuos")
abline(h = 0, lty = 2)


lm_02 %>%
  resid() %>%
  hist(main='Histograma dos resíduos')

8.1.3.7 Distribuição normal dos erros

qqnorm(residuals(lm_02), ylab="Resíduos",xlab="Quantis teóricos",main="")
qqline(residuals(lm_02))

8.1.3.8 Teste de Shapiro

shapiro.test(sample(residuals(lm_02), size = 4800)) 

    Shapiro-Wilk normality test

data:  sample(residuals(lm_02), size = 4800)
W = 0.99177, p-value = 4.439e-16

8.1.3.9 K-fold cross validation

suppressWarnings(CVlm(data=wines_padr,
    form.lm = quality ~ fixedacidity + volatileacidity
               + residualsugar + freesulfurdioxide + density 
               + pH + sulphates + alcohol + type, m=5, dots=TRUE, seed=29, legend.pos="topleft",  printit=FALSE));

8.2 Árvore de Regressão

Para comparar com a regressão linear, foi feito o modelo de árvore de regressão.

8.2.1 Modelo

regression_tree <- rpart (quality ~ fixedacidity + volatileacidity
               + citricacid + residualsugar + chlorides
               + freesulfurdioxide + totalsulfurdioxide
               + density + pH + sulphates + alcohol + type, data=train, 
               cp = 0.001, minsplit = 15, maxdepth=15)

rpart.plot(regression_tree, type=4, extra=1, under=FALSE, clip.right.labs=TRUE,
           fallen.leaves=FALSE,   digits=2, varlen=-10, faclen=20,
           cex=0.4, tweak=1.7,
           compress=TRUE,box.palette="Grays",
           snip=FALSE)

8.2.2 MSE

val_pred_regression_tree <- predict(regression_tree, test, interval = "prediction", level = 0.95) 

mse_tree <- mean((test$quality  - val_pred_regression_tree)^2)
sqrt(mse_tree)
[1] 0.7261796

8.2.3 Correlação entre valores reais e preditos

actuals_preds <- data.frame(cbind(actuals=test$quality, predicteds=val_pred_regression_tree))
correlation_accuracy <- cor(actuals_preds)
correlation_accuracy
             actuals predicteds
actuals    1.0000000  0.5740011
predicteds 0.5740011  1.0000000

8.2.4 Gráfico Resíduo

rs <- val_pred_regression_tree - test$quality 
plot(predict(regression_tree, test), rs, xlab = "Com Árvore de Regressão", ylab = "Residuos")
abline(h = 0, lty = 2)


hist(rs)

8.2.5 Distribuição dos erros aleatórios

qqnorm(rs, ylab="Resíduos",xlab="Quantis teóricos",main="")
qqline(rs)

8.3 Conclusão

Embora com resultados muito parecidos, a técnica de árvore de regressão teve uma acurácia um pouco maior. É possível que o decepcionante resultado das duas técnicas se deve ao tratamento dos vinhos tintos juntamente com os vinhos brancos. Como deu para perceber pela análise exploratória, os dois tipos de vinhos possuem algumas características físico-químicas com comportamentos diferentes.

9 Etapa 2 - Categorizando vinhos “bons” e “ruins”

9.1 Criação da variável resposta categórica

wines_padr$good <- as.factor(ifelse(wines_padr$quality >= 6,1,0))
summary(wines_padr$good)
   0    1 
2384 4113 
train <- wines_padr[train_ind, ]
test <- wines_padr[-train_ind, ]

9.2 Árvore de Decisão

9.2.1 Construção do modelo

decision_tree <- rpart (train$good ~ fixedacidity + volatileacidity
                       + citricacid + residualsugar + chlorides
                       + freesulfurdioxide + totalsulfurdioxide
                       + density + pH + sulphates + alcohol + type, 
                       data = train)

fancyRpartPlot(decision_tree)

9.2.2 Avaliando o Modelo

9.2.2.1 Matriz de Confusão

decision_tree_predict <- predict(decision_tree, test, type='class')

confusion_matrix <- table(test$good, decision_tree_predict)
confusion_matrix
   decision_tree_predict
      0   1
  0 335 289
  1 165 903

9.2.2.2 Acurácia

diagonal <- diag(confusion_matrix)
Acc <-  sum(diagonal)/sum(confusion_matrix)
Acc
[1] 0.7512479

9.3 Regressão Logística

9.3.1 Primeiro Modelo

log_01 <- glm(train$good ~ fixedacidity + volatileacidity
               + citricacid + residualsugar + chlorides
               + freesulfurdioxide + totalsulfurdioxide
               + density + pH + sulphates + alcohol + type, train, family=binomial(link=logit))

summary(log_01)

Call:
glm(formula = train$good ~ fixedacidity + volatileacidity + citricacid + 
    residualsugar + chlorides + freesulfurdioxide + totalsulfurdioxide + 
    density + pH + sulphates + alcohol + type, family = binomial(link = logit), 
    data = train)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.7851  -0.9028   0.4281   0.8156   2.2976  

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)         1.45734    0.19125   7.620 2.54e-14 ***
fixedacidity        0.26705    0.09188   2.906 0.003656 ** 
volatileacidity    -0.80868    0.06112 -13.232  < 2e-16 ***
citricacid         -0.13495    0.04561  -2.959 0.003085 ** 
residualsugar       0.72859    0.12319   5.914 3.34e-09 ***
chlorides          -0.03862    0.09021  -0.428 0.668548    
freesulfurdioxide   0.33884    0.05720   5.924 3.14e-09 ***
totalsulfurdioxide -0.29247    0.07133  -4.100 4.13e-05 ***
density            -0.70905    0.20046  -3.537 0.000404 ***
pH                  0.16099    0.06049   2.661 0.007780 ** 
sulphates           0.40595    0.05437   7.467 8.21e-14 ***
alcohol             0.88727    0.10395   8.536  < 2e-16 ***
typeWHITE          -0.93732    0.24913  -3.762 0.000168 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 6266.8  on 4804  degrees of freedom
Residual deviance: 4912.8  on 4792  degrees of freedom
AIC: 4938.8

Number of Fisher Scoring iterations: 5

9.4 Segundo Modelo

O segundo modelo foi feito removendo as variáveis que possuíam um p-value muito elevado no primeiro modelo.

log_02 <- glm(train$good ~ fixedacidity + volatileacidity
               + citricacid + residualsugar
               + freesulfurdioxide + totalsulfurdioxide
               + density + pH + sulphates + alcohol + type, train, family=binomial(link=logit))

summary(log_02)

Call:
glm(formula = train$good ~ fixedacidity + volatileacidity + citricacid + 
    residualsugar + freesulfurdioxide + totalsulfurdioxide + 
    density + pH + sulphates + alcohol + type, family = binomial(link = logit), 
    data = train)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.7821  -0.9000   0.4288   0.8189   2.3022  

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)         1.44283    0.18810   7.671 1.71e-14 ***
fixedacidity        0.27297    0.09083   3.005 0.002654 ** 
volatileacidity    -0.81038    0.06099 -13.288  < 2e-16 ***
citricacid         -0.13585    0.04555  -2.983 0.002858 ** 
residualsugar       0.74017    0.12018   6.159 7.34e-10 ***
freesulfurdioxide   0.33905    0.05719   5.928 3.06e-09 ***
totalsulfurdioxide -0.29319    0.07131  -4.112 3.93e-05 ***
density            -0.72686    0.19607  -3.707 0.000210 ***
pH                  0.16513    0.05971   2.766 0.005678 ** 
sulphates           0.40672    0.05434   7.485 7.14e-14 ***
alcohol             0.88616    0.10391   8.528  < 2e-16 ***
typeWHITE          -0.91343    0.24263  -3.765 0.000167 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 6266.8  on 4804  degrees of freedom
Residual deviance: 4913.0  on 4793  degrees of freedom
AIC: 4937

Number of Fisher Scoring iterations: 5

9.4.1 Matriz de confusão

logistic_regression_predict <- predict(log_02, test, type="response") %>%
  cut(., breaks=c(0,0.50,1), right=F)

MC <- table(test$good,  logistic_regression_predict , deparse.level = 2) # montar a matriz de confusão  
show(MC)
         logistic_regression_predict
test$good [0,0.5) [0.5,1)
        0     257     181
        1     126     638

9.4.2 Acurácia

ACC = sum(diag(MC))/sum(MC) 
show(ACC)
[1] 0.7445923

9.4.3 Gráfico ROC

p <- predict(log_02, test, type="response")
pr <- prediction(p, test$good)
prf <- performance(pr, measure = "tpr", x.measure = "fpr")
plot(prf)


auc <- performance(pr, measure = "auc")
auc <- auc@y.values[[1]]
auc
[1] 0.8135444

9.5 Conclusão

As técnicas de árvore de decisão e regressão logística apresentaram resultados muito similares. Em contraste com a primeira etapa, os modelos para previsão de uma variável categórica de qualidade tiveram uma acurácia muito maior.

10 Etapa 3: Definir grupos de vinhos

wines_padr <- wines_padr %>% 
  select(-quality, -good, -type)

10.1 Determinação de quantidade clusters

wss <- (nrow(wines_padr)-1)*sum(apply(wines_padr,2,var))
for (i in 2:20) wss[i] <- sum(kmeans(wines_padr,
                                     centers=i)$withinss)
did not converge in 10 iterations
plot(1:20, wss, type="b", xlab="Número de clusters",
     ylab="Soma de quadrados intra-clusters") 

Analisando o gráfico acima, optei por dividir os vinhos em 6 grupos distintos.

10.2 Cluster Hierárquico

Identificando os grupos no Dendograma.

hier_cluster <- hclust(dist(wines_padr),method='ward.D2')
d <- dist(wines_padr, method = "euclidean")
plot(hier_cluster, ylab='distancia', cex=0.6)

groups <- cutree(hier_cluster, k=6) #Identificando os 6 diferentes grupos
rect.hclust(hier_cluster, k=6, border="red") 

10.3 K-Means

set.seed(1232)
output_cluster <- kmeans(wines_padr, 6, iter=100)
output_cluster
K-means clustering with 6 clusters of sizes 946, 1206, 1483, 1219, 646, 997

Cluster means:
  fixedacidity volatileacidity  citricacid residualsugar  chlorides freesulfurdioxide totalsulfurdioxide    density          pH
1   0.08473360       1.6866294 -1.26213820    -0.6317944  0.6817667        -0.7986723        -1.16915557  0.5037673  0.96129972
2  -0.52520677      -0.2668938 -0.02511597    -0.4822657 -0.5802020        -0.1213941        -0.16797325 -1.3589390 -0.04482517
3  -0.17028943      -0.3492428  0.31064945     1.4659004 -0.1443988         0.9372052         0.99798842  0.9223711 -0.49867076
4   0.09639664      -0.4497086  0.23995436    -0.2777307 -0.2107031        -0.2721956         0.07332153 -0.4284112 -0.72419807
5   2.00659014       0.5019104  0.95725823    -0.5616504  1.2624821        -0.8942313        -1.25067366  1.0021260 -0.06731150
6  -0.60981357      -0.5333900 -0.14775699    -0.2941430 -0.2906727         0.4228183         0.54878266 -0.3316950  0.81291779
    sulphates     alcohol
1  0.40065402 -0.24807743
2 -0.30296210  1.39885650
3 -0.27728431 -0.84938155
4 -0.50528600 -0.08757578
5  1.42050025  0.02119780
6  0.09615516 -0.09994560

Clustering vector:
   [1] 4 2 5 6 3 1 1 1 2 3 3 6 6 6 4 2 1 4 1 6 6 3 1 4 4 4 6 5 1 3 2 1 5 4 1 6 6 3 2 3 3 3 6 5 2 3 4 3 2 2 4 2 6 6 1 6 4 6 1 1 2 3
  [63] 4 4 1 2 4 5 2 3 6 4 3 1 4 5 5 6 2 3 2 2 5 5 5 1 2 3 1 2 5 6 2 2 3 2 3 4 2 2 1 4 2 4 6 1 4 2 4 6 3 4 4 4 4 3 2 4 6 2 4 6 5 3
 [125] 6 3 4 6 1 1 6 5 6 2 3 1 1 3 4 3 3 3 2 3 2 2 5 3 1 1 6 4 4 2 1 6 3 3 5 3 3 6 3 5 2 2 1 4 6 4 1 1 3 3 2 6 2 1 6 3 2 1 4 1 4 6
 [187] 3 6 1 3 4 1 3 2 5 2 6 3 2 3 4 2 3 6 3 6 3 6 5 3 1 2 6 5 5 6 3 2 4 3 3 3 4 4 3 1 1 4 2 3 1 2 4 3 1 4 3 1 5 3 5 1 1 3 2 6 6 6
 [249] 3 2 4 6 2 2 4 3 5 3 3 3 3 1 1 2 1 2 2 6 4 3 2 4 3 3 1 5 2 3 5 1 2 3 5 1 6 5 2 5 3 2 3 6 2 2 1 6 1 3 5 6 3 3 1 5 4 3 2 1 1 3
 [311] 4 1 5 1 2 3 1 4 1 1 4 6 6 3 6 2 4 1 1 3 5 4 3 3 5 1 4 3 5 3 2 5 4 3 6 6 1 5 4 2 2 4 4 6 4 4 3 2 6 6 4 2 3 2 6 1 2 3 2 4 3 4
 [373] 3 1 4 1 2 6 4 6 5 5 3 6 3 1 2 4 2 2 1 2 6 5 3 6 5 1 4 4 1 4 3 4 2 3 6 2 5 1 4 4 2 6 3 6 4 3 5 1 2 3 3 3 3 3 2 6 2 2 6 1 2 2
 [435] 4 1 3 4 2 2 3 4 3 3 3 3 1 6 1 1 6 4 4 4 6 3 3 2 4 4 2 3 1 6 6 4 2 6 4 2 2 2 2 4 3 4 6 3 6 2 4 5 2 2 6 6 3 2 4 3 4 4 2 4 2 1
 [497] 3 4 5 3 2 4 6 2 4 6 5 6 4 6 5 4 1 6 3 3 5 6 2 3 1 1 3 6 6 4 6 3 6 4 6 2 2 1 6 2 1 3 1 6 3 4 1 4 2 5 6 6 1 4 3 3 3 6 2 6 5 2
 [559] 5 6 4 2 3 3 6 2 3 6 2 5 4 4 6 2 4 4 3 3 3 2 6 5 2 4 3 3 5 2 1 3 4 1 1 3 1 2 3 3 2 3 4 3 3 3 5 1 1 1 1 4 3 4 4 4 5 3 2 1 6 5
 [621] 2 2 4 4 2 1 1 1 2 2 3 5 1 6 3 3 4 4 3 5 2 3 4 3 2 2 6 6 3 3 3 2 4 5 2 2 1 3 2 5 5 2 2 5 5 3 6 4 2 1 6 3 4 2 4 2 3 1 4 3 1 4
 [683] 5 3 3 2 1 4 3 4 1 1 2 2 1 2 3 3 3 3 2 5 4 2 1 2 4 3 2 1 6 1 2 4 3 4 3 4 2 6 3 3 6 3 3 6 6 4 6 4 4 1 2 6 3 3 6 2 5 5 3 4 2 4
 [745] 1 2 2 1 2 5 5 1 6 5 6 6 2 5 1 1 6 1 4 2 1 2 5 3 6 1 3 1 3 6 2 5 2 4 4 2 3 4 4 2 1 6 2 6 6 1 1 5 6 4 5 4 3 1 4 2 2 4 6 4 1 6
 [807] 3 1 1 4 4 3 3 5 3 6 5 4 6 6 1 1 5 1 6 4 3 1 2 4 4 6 3 3 4 4 4 3 4 4 4 1 3 6 3 3 3 6 2 5 5 6 4 3 4 2 4 2 3 1 2 2 2 5 6 4 1 6
 [869] 6 3 4 3 4 6 5 4 6 2 6 5 1 1 2 1 2 4 5 6 3 4 2 1 2 2 5 6 6 1 2 4 1 2 3 2 3 3 3 1 3 4 1 2 4 3 6 5 3 1 5 2 2 4 1 4 1 4 1 2 4 6
 [931] 6 1 3 2 2 6 5 3 6 4 5 1 4 5 1 6 1 1 6 6 2 2 2 6 4 2 1 2 3 2 2 2 3 3 5 6 3 6 5 5 3 5 2 1 1 2 6 4 2 2 2 4 5 3 3 6 6 2 6 1 6 1
 [993] 4 3 2 6 5 6 3 4
 [ reached getOption("max.print") -- omitted 5497 entries ]

Within cluster sum of squares by cluster:
[1] 5121.289 4905.915 7942.310 5922.045 7546.492 4797.123
 (between_SS / total_SS =  49.3 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      
table(output_cluster$cluster)

   1    2    3    4    5    6 
 946 1206 1483 1219  646  997 

Visualizando o resultado de uma forma gráfica

plot(tkmeans(wines_padr , k = 6, alpha = 0.01))

10.4 PCA

Vamos tentar reduzir o número de variáveis e verificar o impacto que isso terá na clusterização.

acpcor_wines <- prcomp(wines_padr, scale = TRUE, retx = TRUE) 
summary(acpcor_wines)
Importance of components:
                          PC1    PC2    PC3     PC4     PC5     PC6     PC7     PC8    PC9    PC10    PC11
Standard deviation     1.7412 1.5791 1.2464 0.98545 0.84537 0.77894 0.72356 0.70943 0.5821 0.47810 0.18564
Proportion of Variance 0.2756 0.2267 0.1412 0.08828 0.06497 0.05516 0.04759 0.04575 0.0308 0.02078 0.00313
Cumulative Proportion  0.2756 0.5023 0.6435 0.73181 0.79678 0.85194 0.89953 0.94528 0.9761 0.99687 1.00000
plot(1:ncol(wines_padr), acpcor_wines$sdev^2, type = "b", xlab = "Componente",
     ylab = "Variância", pch = 20, cex.axis = 0.8, cex.lab = 0.8)

Pelas informações acima, é possível verificar que ao optar pelos 6 primeiros componentes, obtém-se 85% da variância dos dados.

escore1 <- acpcor_wines$x[, 1]
escore2 <- acpcor_wines$x[, 2]
escore3 <- acpcor_wines$x[, 3]
escore4 <- acpcor_wines$x[, 4]
escore5 <- acpcor_wines$x[, 5]
escore6 <- acpcor_wines$x[, 6]

wines_cpa <-cbind(escore1, escore2, escore3, escore4, escore5, escore6)

wss <- (nrow(wines_cpa)-1)*sum(apply(wines_cpa,2,var))
for (i in 2:20) wss[i] <- sum(kmeans(wines_cpa,
                                     centers=i)$withinss)
plot(1:20, wss, type="b", xlab="Number of Clusters",
     ylab="Within groups sum of squares") 

10.4.1 K-Means com PCA

set.seed(5425)
output_cluster_pca <- kmeans(wines_cpa, 6,iter=100)
output_cluster
K-means clustering with 6 clusters of sizes 1232, 930, 1422, 1128, 1130, 655

Cluster means:
     escore1    escore2    escore3     escore4     escore5     escore6
1 -0.5561151  0.2304678 -0.5060747  0.40461549 -0.07829805 -0.54193526
2  2.5395077 -0.1482698 -1.4627894 -0.38520916 -0.12219321  0.13766453
3 -1.9186941 -1.5728764 -0.4141836 -0.09042038  0.19038205  0.18280241
4 -0.3889762  0.7653344  1.1073630 -0.66952847 -0.24478532 -0.02662016
5 -0.1498876  2.1033691  0.2640424  0.39209807  0.03840748  0.24111008
6  2.5342180 -1.7549915  1.5654563  0.45876841  0.26274350  0.05689213

Clustering vector:
   [1] 1 5 6 1 3 2 2 2 5 3 3 1 1 1 4 5 2 4 2 1 1 3 2 1 4 4 1 6 2 3 5 2 6 1 2 1 1 3 4 3 3 3 1 6 5 3 4 3 4 4 4 5 1 1 2 5 4 1 2 2 5 3
  [63] 4 4 2 4 4 6 5 3 1 4 3 2 4 6 6 5 5 3 5 5 6 6 6 2 5 3 2 5 6 1 4 5 3 5 3 4 5 5 2 4 5 4 1 2 4 4 4 5 3 4 4 1 4 3 5 1 1 5 4 1 6 3
 [125] 1 3 1 1 2 2 1 6 1 5 3 2 2 3 4 3 3 3 5 3 4 5 6 3 2 2 1 4 4 5 2 1 3 3 6 3 3 1 3 6 5 4 2 4 1 4 2 2 3 3 5 1 4 2 1 3 4 2 1 2 4 1
 [187] 3 1 2 3 1 2 3 4 6 5 5 3 5 3 1 5 3 1 3 1 3 5 6 3 2 5 5 6 6 1 3 5 4 3 3 3 4 1 3 2 2 4 5 4 2 5 4 3 2 1 3 2 6 3 6 2 2 3 5 1 1 1
 [249] 3 5 4 1 5 5 4 3 6 3 4 3 3 2 2 5 2 5 5 5 4 3 5 4 3 3 2 6 4 3 6 2 5 3 6 2 1 6 5 6 3 4 3 1 5 5 2 1 2 3 6 1 3 3 2 6 1 3 5 2 2 3
 [311] 4 2 6 2 5 3 2 1 2 2 1 1 5 3 1 5 4 2 2 3 6 4 3 3 6 2 4 3 6 3 5 6 4 3 5 1 2 6 4 5 4 4 4 5 1 4 3 5 5 1 4 5 3 5 5 2 5 3 5 4 3 4
 [373] 3 2 1 2 5 5 4 1 6 6 3 1 3 2 5 4 5 5 2 4 1 6 3 1 6 2 4 1 2 4 3 4 5 3 1 5 6 2 4 4 5 1 3 5 4 3 6 2 5 3 3 3 1 3 5 1 5 5 1 2 5 5
 [435] 3 2 3 4 5 5 3 4 3 3 3 3 2 1 2 2 1 1 1 4 1 3 3 5 4 4 5 1 2 1 5 4 5 1 4 5 4 5 5 1 3 4 1 3 1 5 4 6 5 5 1 1 1 5 4 3 1 1 5 1 4 2
 [497] 3 4 6 3 5 4 1 5 4 1 6 1 1 1 6 1 2 1 3 3 2 5 5 3 2 2 3 1 1 4 1 1 5 1 1 4 5 2 1 4 2 3 2 1 3 4 2 1 5 6 1 1 2 4 1 3 3 1 5 1 6 4
 [559] 6 1 4 5 3 3 1 5 3 1 4 6 4 4 1 4 4 1 3 3 3 5 1 6 4 4 3 1 6 5 2 3 4 2 6 3 2 5 3 3 5 3 1 3 3 3 6 2 2 2 2 4 3 4 4 1 6 3 5 2 1 6
 [621] 4 5 3 4 5 2 5 2 5 5 3 6 2 1 3 3 4 1 3 6 5 3 4 3 5 5 1 1 3 3 3 5 4 6 4 5 2 1 4 6 6 4 5 6 6 1 1 4 4 2 1 3 4 4 4 5 3 2 4 4 2 1
 [683] 6 3 3 4 2 4 3 4 2 2 5 5 2 4 3 3 3 3 4 6 1 4 2 4 4 3 5 2 1 2 5 4 3 4 3 4 5 1 3 3 1 3 3 1 5 4 1 4 4 2 5 1 3 3 1 5 6 6 3 4 5 1
 [745] 2 5 4 2 5 6 6 2 1 6 5 1 4 6 2 2 1 2 1 5 2 5 6 1 1 2 3 2 3 1 5 6 5 4 3 5 3 4 1 4 2 1 5 1 1 2 2 6 1 4 6 4 1 2 1 5 5 4 1 4 2 5
 [807] 3 1 2 4 4 3 3 6 3 1 6 4 1 1 2 2 6 2 1 1 3 2 5 4 5 5 3 3 1 4 4 3 4 1 4 2 3 1 3 3 3 1 4 6 6 5 1 3 4 4 4 5 3 2 4 4 5 6 1 4 2 1
 [869] 1 3 4 3 1 1 6 1 1 5 2 6 2 2 5 2 4 4 6 1 3 4 4 4 5 5 6 1 1 2 5 4 2 4 3 5 3 3 3 2 3 1 2 5 4 1 1 6 3 2 6 4 5 4 2 4 2 1 2 5 4 1
 [931] 1 2 3 4 4 1 6 3 5 4 6 2 4 6 2 1 2 2 1 1 5 5 5 1 4 5 2 5 3 5 4 5 3 3 6 1 3 1 6 6 3 6 5 2 2 5 1 1 5 5 5 4 6 3 3 5 1 5 1 2 1 2
 [993] 1 3 4 1 6 1 3 4
 [ reached getOption("max.print") -- omitted 5497 entries ]

Within cluster sum of squares by cluster:
[1] 4526.465 3837.918 4984.047 3541.824 3502.742 5717.291
 (between_SS / total_SS =  57.1 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      
table(output_cluster_pca$cluster)

   1    2    3    4    5    6 
1232  930 1422 1128 1130  655 
plot(tkmeans(wines_cpa , k = 6, alpha = 0.01))

10.4.2 Cruzando os clusters

Agora vou cruzar os resultados obtidos a partir das variáveis com os resultados obitos pelos componentes principais.

cruzamento <- data.frame(cbind(output_cluster$cluster, output_cluster_pca$cluster))
colnames(cruzamento) <- c("sem_pca", "com_pca")
cruzamento %>%
  mutate(sem_pca = as.factor(sem_pca), com_pca = as.factor(com_pca)) %>%
  ggplot(aes(sem_pca, com_pca)) +
  geom_count(colour = "blue") +
  theme(legend.position = "bottom")

O count plot acima indica que, apesar de haver mudança na forma como os vinhos foram classificados, em sua maioria os agrupamentos continuaram parecidos.

LS0tCnRpdGxlOiAiVHJhYmFsaG8gRmluYWwgZGUgQ29uY2VpdG9zIEVzdGF0w61zdGljb3MgcGFyYSBJQSIKb3V0cHV0OiAKICBodG1sX25vdGVib29rOgogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCi0tLQoKQWx1bm86IEZlbGlwZSBNYXJ0aW0gRmVybmFuZGVzIFZpZWlyYQoKIyBJbnRyb2R1w6fDo28KCk8gb2JqZXRpdm8gZGVzc2UgdHJhYmFsaG8gw6kgZmF6ZXIgYSBhbsOhbGlzZSBleHBsb3JhdMOzcmlhIGRlIHVtYSBiYXNlIGRlIGRhZG9zIGRlIHZpbmhvcyBkZSB1bWEgcmVnacOjbyBkZSBQb3J0dWdhbCwgZSBjb25zdHJ1aXIgbW9kZWxvcyB1dGlsaXphbmRvIGFzIHTDqWNuaWNhcyBhcHJlbmRpZGFzIGVtIHNhbGEgZGUgYXVsYS4gQXMgZGlmZXJlbnRlcyBldGFwYXMgZG8gdHJhYmFsaG8gc2VndWVtIGFiYWl4bzoKCjEuIEVzdGltYXIgYSB2YXJpw6F2ZWwgInF1YWxpdHkiIGVtIGZ1bsOnw6NvIGRhcyBjYXJhY3RlcsOtc3RpY2FzIGbDrXNpY28tcXXDrW1pY2FzIGRvcyB2aW5ob3MgYXRyYXbDqXMgZGEgY29uc3RydcOnw6NvIGRvcyBtb2RlbG9zIHByZWRpdGl2b3MgbGlzdGFkb3MuCiAgICArIFJlZ3Jlc3PDo28gTGluZWFyCiAgICArIMOBcnZvcmUgZGUgUmVncmVzc8OjbwogICAgCjIuIENhdGVnb3JpemHDp8OjbyBkb3MgdmluaG9zIGVtICJib25zIiBvdSAicnVpbnMiLCBzZW5kbyBxdWUgb3MgdmluaG9zIGNvbSBub3RhcyBtYWlvcmVzIG91IGlndWFpcyBhIDYgc2Vyw6NvIGNvbnNpZGVyYWRvcyBkZSBib2EgcXVhbGlkYWRlLgogICAgKyBSZWdyZXNzw6NvIExvZ8Otc3RpY2EKICAgICsgw4Fydm9yZSBkZSBEZWNpc8OjbwogICAgCjMuIERlZmluaXIgZ3J1cG9zIGRlIHZpbmhvcyB1dGlsaXphbmRvIG3DqXRvZG9zIGRlIF9jbHVzdGVyaXphw6fDo29fCiAgICArIEhpZXLDoXJxdWljYQogICAgKyBLLU1lYW5zCiAgICArIEFuw6FsaXNlIGRlIENvbXBvbmVudGVzIFByaW5jaXBhaXMKICAgIAoKIyBQcmVwYXJhw6fDtWVzCgojIyBDYXJyZWdhbmRvIGFzIGJpYmxpb3RlY2FzIG5lY2Vzc8OhcmlhcwoKYGBge3J9CmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoY29ycmdyYW0pCmxpYnJhcnkoJ0dHYWxseScpCmxpYnJhcnkocGxvdGx5KQpsaWJyYXJ5KGNhcmV0KQpsaWJyYXJ5KGUxMDcxKQpsaWJyYXJ5KGxtdGVzdCkKbGlicmFyeShEQUFHKQpsaWJyYXJ5KHJwYXJ0KQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCmxpYnJhcnkocmF0dGxlKQpsaWJyYXJ5KHRjbHVzdCkKbGlicmFyeShjbHVzdGVyKQpsaWJyYXJ5KGZwYykKbGlicmFyeShST0NSKQpgYGAKCgojIyBGdW7Dp8O1ZXMgYXV4aWxpYXJlcwoKIyMjIE11bHRpcGxvdAoKYGBge3J9CiMgTXVsdGlwbGUgcGxvdCBmdW5jdGlvbgojCiMgZ2dwbG90IG9iamVjdHMgY2FuIGJlIHBhc3NlZCBpbiAuLi4sIG9yIHRvIHBsb3RsaXN0IChhcyBhIGxpc3Qgb2YgZ2dwbG90IG9iamVjdHMpCiMgLSBjb2xzOiAgIE51bWJlciBvZiBjb2x1bW5zIGluIGxheW91dAojIC0gbGF5b3V0OiBBIG1hdHJpeCBzcGVjaWZ5aW5nIHRoZSBsYXlvdXQuIElmIHByZXNlbnQsICdjb2xzJyBpcyBpZ25vcmVkLgojCiMgSWYgdGhlIGxheW91dCBpcyBzb21ldGhpbmcgbGlrZSBtYXRyaXgoYygxLDIsMywzKSwgbnJvdz0yLCBieXJvdz1UUlVFKSwKIyB0aGVuIHBsb3QgMSB3aWxsIGdvIGluIHRoZSB1cHBlciBsZWZ0LCAyIHdpbGwgZ28gaW4gdGhlIHVwcGVyIHJpZ2h0LCBhbmQKIyAzIHdpbGwgZ28gYWxsIHRoZSB3YXkgYWNyb3NzIHRoZSBib3R0b20uCiMKbXVsdGlwbG90IDwtIGZ1bmN0aW9uKC4uLiwgcGxvdGxpc3Q9TlVMTCwgZmlsZSwgY29scz0xLCBsYXlvdXQ9TlVMTCkgewogIGxpYnJhcnkoZ3JpZCkKICAKICAjIE1ha2UgYSBsaXN0IGZyb20gdGhlIC4uLiBhcmd1bWVudHMgYW5kIHBsb3RsaXN0CiAgcGxvdHMgPC0gYyhsaXN0KC4uLiksIHBsb3RsaXN0KQogIAogIG51bVBsb3RzID0gbGVuZ3RoKHBsb3RzKQogIAogICMgSWYgbGF5b3V0IGlzIE5VTEwsIHRoZW4gdXNlICdjb2xzJyB0byBkZXRlcm1pbmUgbGF5b3V0CiAgaWYgKGlzLm51bGwobGF5b3V0KSkgewogICAgIyBNYWtlIHRoZSBwYW5lbAogICAgIyBuY29sOiBOdW1iZXIgb2YgY29sdW1ucyBvZiBwbG90cwogICAgIyBucm93OiBOdW1iZXIgb2Ygcm93cyBuZWVkZWQsIGNhbGN1bGF0ZWQgZnJvbSAjIG9mIGNvbHMKICAgIGxheW91dCA8LSBtYXRyaXgoc2VxKDEsIGNvbHMgKiBjZWlsaW5nKG51bVBsb3RzL2NvbHMpKSwKICAgICAgICAgICAgICAgICAgICAgbmNvbCA9IGNvbHMsIG5yb3cgPSBjZWlsaW5nKG51bVBsb3RzL2NvbHMpKQogIH0KICAKICBpZiAobnVtUGxvdHM9PTEpIHsKICAgIHByaW50KHBsb3RzW1sxXV0pCiAgICAKICB9IGVsc2UgewogICAgIyBTZXQgdXAgdGhlIHBhZ2UKICAgIGdyaWQubmV3cGFnZSgpCiAgICBwdXNoVmlld3BvcnQodmlld3BvcnQobGF5b3V0ID0gZ3JpZC5sYXlvdXQobnJvdyhsYXlvdXQpLCBuY29sKGxheW91dCkpKSkKICAgIAogICAgIyBNYWtlIGVhY2ggcGxvdCwgaW4gdGhlIGNvcnJlY3QgbG9jYXRpb24KICAgIGZvciAoaSBpbiAxOm51bVBsb3RzKSB7CiAgICAgICMgR2V0IHRoZSBpLGogbWF0cml4IHBvc2l0aW9ucyBvZiB0aGUgcmVnaW9ucyB0aGF0IGNvbnRhaW4gdGhpcyBzdWJwbG90CiAgICAgIG1hdGNoaWR4IDwtIGFzLmRhdGEuZnJhbWUod2hpY2gobGF5b3V0ID09IGksIGFyci5pbmQgPSBUUlVFKSkKICAgICAgCiAgICAgIHByaW50KHBsb3RzW1tpXV0sIHZwID0gdmlld3BvcnQobGF5b3V0LnBvcy5yb3cgPSBtYXRjaGlkeCRyb3csCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGF5b3V0LnBvcy5jb2wgPSBtYXRjaGlkeCRjb2wpKQogICAgfQogIH0KfQpgYGAKCiMjIENhcnJlZ2FuZG8gb3MgZGFkb3MKCmBgYHtyfQpzZXR3ZCgnfi93b3Jrc3BhY2UvZmlhcC93aW5lJykKCndpbmVzIDwtIHJlYWQuY3N2MihmaWxlPSJCYXNlV2luZV9SZWRfZV9XaGl0ZS5jc3YiCiAgICAgICAgICAgICAgICAgICAsIGhlYWRlcj1UUlVFCiAgICAgICAgICAgICAgICAgICAsIHNlcD0iOyIpCgpgYGAKCiMgVmlzw6NvIGdlcmFsIGRvcyBkYWRvcwoKYGBge3J9CnN0cih3aW5lcykKc3VtbWFyeSh3aW5lcykKYGBgCgpWZXJpZmljYW5kbyBzZSBow6EgdmFsb3JlcyBmYWx0YW50ZXMKCmBgYHtyfQpzdW0oaXMubmEod2luZXMpKQpgYGAKCiMjIFJlbm9tZWFuZG8gZSByZW1vdmVuZG8gdmFyacOhdmVpcwoKQSB2YXJpw6F2ZWwgX2lkX3ZpbmhvXyBuw6NvIMOpIG5lY2Vzc8OhcmlhIHBhcmEgYSBub3NzYSBhbsOhbGlzZSBlIHNlcsOhIHJlbW92aWRhLiBQYXJhIG1hbnRlciBhIGNvbnNpc3TDqm5jaWEgbmEgbm9tZW5jbGF0dXJhLCByZW5vbWVlaSBfVmluaG9fIHBhcmEgX3R5cGVfLgoKYGBge3J9CndpbmVzX2FkanVzdGVkIDwtIHdpbmVzICU+JSBzZWxlY3QoLWlkX3ZpbmhvKSAlPiUgcmVuYW1lKHR5cGUgPSBWaW5obykKYGBgCgojIFZpc3VhbGl6YW5kbyBhcyBjYXJhY3RlcsOtc3RpY2FzIGluZGl2aWR1YWxtZW50ZQoKCmBgYHtyfQpwMSA8LSB3aW5lc19hZGp1c3RlZCAlPiUgZ2dwbG90KGFlcyh4ID0gY2hsb3JpZGVzKSkgKyAKICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gNDAsIGZpbGwgPSAnbGlnaHRibHVlJykKCnAyIDwtIHdpbmVzX2FkanVzdGVkICU+JSBnZ3Bsb3QoYWVzKHggPSBzdWxwaGF0ZXMpKSArIAogIGdlb21faGlzdG9ncmFtKGJpbnMgPSA0MCwgZmlsbCA9ICdsaWdodGJsdWUnKSAKCnAzIDwtIHdpbmVzX2FkanVzdGVkICU+JSBnZ3Bsb3QoYWVzKHggPSBmaXhlZGFjaWRpdHkpKSArIAogIGdlb21faGlzdG9ncmFtKGJpbnMgPSA0MCwgZmlsbCA9ICdsaWdodGJsdWUnKSAKCnA0IDwtIHdpbmVzX2FkanVzdGVkICU+JSBnZ3Bsb3QoYWVzKHggPSBmcmVlc3VsZnVyZGlveGlkZSkpICsgCiAgZ2VvbV9oaXN0b2dyYW0oYmlucyA9IDQwLCBmaWxsID0gJ2xpZ2h0Ymx1ZScpIAoKcDUgPC0gd2luZXNfYWRqdXN0ZWQgJT4lIGdncGxvdChhZXMoeCA9IGFsY29ob2wpKSArIAogIGdlb21faGlzdG9ncmFtKGJpbnMgPSA0MCwgZmlsbCA9ICdsaWdodGJsdWUnKSAKCnA2IDwtIHdpbmVzX2FkanVzdGVkICU+JSBnZ3Bsb3QoYWVzKHggPSB2b2xhdGlsZWFjaWRpdHkpKSArIAogIGdlb21faGlzdG9ncmFtKGJpbnMgPSA0MCwgZmlsbCA9ICdsaWdodGJsdWUnKSAKCnA3IDwtIHdpbmVzX2FkanVzdGVkICU+JSBnZ3Bsb3QoYWVzKHggPSB0b3RhbHN1bGZ1cmRpb3hpZGUpKSArIAogIGdlb21faGlzdG9ncmFtKGJpbnMgPSA0MCwgZmlsbCA9ICdsaWdodGJsdWUnKSAKCnA4IDwtIHdpbmVzX2FkanVzdGVkICU+JSBnZ3Bsb3QoYWVzKHggPSBjaXRyaWNhY2lkKSkgKyAKICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gNDAsIGZpbGwgPSAnbGlnaHRibHVlJykgCgpwOSA8LSB3aW5lc19hZGp1c3RlZCAlPiUgZ2dwbG90KGFlcyh4ID0gZGVuc2l0eSkpICsgCiAgZ2VvbV9oaXN0b2dyYW0oYmlucyA9IDQwLCBmaWxsID0gJ2xpZ2h0Ymx1ZScpIAoKcDEwIDwtIHdpbmVzX2FkanVzdGVkICU+JSBnZ3Bsb3QoYWVzKHggPSBwSCkpICsgCiAgZ2VvbV9oaXN0b2dyYW0oYmlucyA9IDQwLCBmaWxsID0gJ2xpZ2h0Ymx1ZScpIAoKcDExIDwtIHdpbmVzX2FkanVzdGVkICU+JSBnZ3Bsb3QoYWVzKHggPSByZXNpZHVhbHN1Z2FyKSkgKyAKICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gNDAsIGZpbGwgPSAnbGlnaHRibHVlJykgCgpwMTIgPC0gd2luZXNfYWRqdXN0ZWQgJT4lIGdncGxvdChhZXMoeCA9IHF1YWxpdHkpKSArIAogIGdlb21faGlzdG9ncmFtKGJpbnMgPSA2LCBmaWxsID0gJ2xpZ2h0Ymx1ZScpIAoKbXVsdGlwbG90KHAxLCBwMiwgcDMsIHA0LCBwNSwgcDYsIHA3LCBwOCwgcDksIHAxMCwgcDExLCBwMTIsIGNvbHMgPSAzICkKYGBgCgojIFZlcmlmaWNhbmRvIGEgcHJlc2Vuw6dhIGRlIF9vdXRsaWVyc18KCkFvIHZpc3VhbGl6YXIgb3MgYm94cGxvdHMgYWJhaXhvLCDDqSBwb3Nzw612ZWwgdmVyaWZpY2FyIHF1ZSBleGlzdGVtIGluw7ptZXJvcyBvdXRsaWVycywgcXVlIHByb3ZhdmVsbWVudGUgc2Vyw6NvIHJlbW92aWRvcyBwYXJhIGEgbW9kZWxhZ2VtLgoKYGBge3J9CnBhciAobWZyb3c9YygyLDIpKQpib3hwbG90KHdpbmVzX2FkanVzdGVkJGNobG9yaWRlcywgbWFpbj0nY2hsb3JpZGVzJykKYm94cGxvdCh3aW5lc19hZGp1c3RlZCRzdWxwaGF0ZXMsIG1haW49J3N1bHBoYXRlcycpCmJveHBsb3Qod2luZXNfYWRqdXN0ZWQkZml4ZWRhY2lkaXR5LCBtYWluPSdmaXhlZGFjaWRpdHknKQpib3hwbG90KHdpbmVzX2FkanVzdGVkJHJlc2lkdWFsc3VnYXIsIG1haW49J3Jlc2lkdWFsc3VnYXInKQpib3hwbG90KHdpbmVzX2FkanVzdGVkJGZyZWVzdWxmdXJkaW94aWRlLCBtYWluPSdmcmVlc3VsZnVyZGlveGlkZScpCmJveHBsb3Qod2luZXNfYWRqdXN0ZWQkYWxjb2hvbCwgbWFpbj0nYWxjb2hvbCcpCmJveHBsb3Qod2luZXNfYWRqdXN0ZWQkdm9sYXRpbGVhY2lkaXR5LCBtYWluPSd2b2xhdGlsZWFjaWRpdHknKQpib3hwbG90KHdpbmVzX2FkanVzdGVkJHRvdGFsc3VsZnVyZGlveGlkZSwgbWFpbj0ndG90YWxzdWxmdXJkaW94aWRlJykKYm94cGxvdCh3aW5lc19hZGp1c3RlZCRxdWFsaXR5LCBtYWluPSdxdWFsaXR5JykKYm94cGxvdCh3aW5lc19hZGp1c3RlZCRjaXRyaWNhY2lkLCBtYWluPSdjaXRyaWNhY2lkJykKYm94cGxvdCh3aW5lc19hZGp1c3RlZCRkZW5zaXR5LCBtYWluPSdkZW5zaXR5JykKYm94cGxvdCh3aW5lc19hZGp1c3RlZCRwSCwgbWFpbj0ncEgnKQpwYXIgKG1mcm93PWMoMSwxKSkKYGBgCgojIyBQYWRyb25pemFuZG8gYXMgdmFyacOhdmVpcwoKYGBge3J9CndpbmVzX3BhZHIgPC0gcHJlUHJvY2Vzcyh3aW5lc19hZGp1c3RlZFssMToxMV0sIGMoImNlbnRlciIsICJzY2FsZSIpKSAlPiUgCiAgcHJlZGljdCguLCB3aW5lc19hZGp1c3RlZCkgJT4lIAogIGRhdGEuZnJhbWUodHJhbnMgPSAuKQoKY29sbmFtZXMod2luZXNfcGFkcikgPC0gY29sbmFtZXMod2luZXNfYWRqdXN0ZWQpCgpzdHIod2luZXNfcGFkcikKc3VtbWFyeSh3aW5lc19wYWRyKQpgYGAKCiMjIFJlbW/Dp8OjbyBkb3MgX291dGxpZXJzXwoKUGFyYSBhIHJlbW/Dp8OjbyBkb3Mgb3V0bGllcnMsIGZvaSB1dGlsaXphZG8gbyBtw6l0b2RvIFotU2NvcmUuIE9zIHZhbG9yZXMgY29tIG1haXMgZGUgMyBkZXN2aW9zIHBhZHLDtWVzIGZvcmFtIHJlbW92aWRvcy4KCmBgYHtyfQp3aW5lc19wYWRyIDwtIHdpbmVzX3BhZHJbIWFicyh3aW5lc19wYWRyJGNobG9yaWRlcykgPiAzLF0gJT4lCiAgICAgICAgICAgICAgLlshYWJzKC4kc3VscGhhdGVzKSA+IDMsXSAlPiUKICAgICAgICAgICAgICAuWyFhYnMoLiRmaXhlZGFjaWRpdHkpID4gMyxdICU+JQogICAgICAgICAgICAgIC5bIWFicyguJHJlc2lkdWFsc3VnYXIpID4gMyxdICU+JQogICAgICAgICAgICAgIC5bIWFicyguJGZyZWVzdWxmdXJkaW94aWRlKSA+IDMsXSAlPiUKICAgICAgICAgICAgICAuWyFhYnMoLiRhbGNvaG9sKSA+IDMsXSAlPiUKICAgICAgICAgICAgICAuWyFhYnMoLiR2b2xhdGlsZWFjaWRpdHkpID4gMyxdICU+JQogICAgICAgICAgICAgIC5bIWFicyguJHRvdGFsc3VsZnVyZGlveGlkZSkgPiAzLF0gJT4lCiAgICAgICAgICAgICAgLlshYWJzKC4kY2l0cmljYWNpZCkgPiAzLF0gJT4lCiAgICAgICAgICAgICAgLlshYWJzKC4kZGVuc2l0eSkgPiAzLF0gJT4lCiAgICAgICAgICAgICAgLlshYWJzKC4kcEgpID4gMyxdCgpzdHIod2luZXNfcGFkcikKc3VtbWFyeSh3aW5lc19wYWRyKQogIApgYGAKCiMgSW50ZXJhw6fDo28gZW50cmUgdmFyacOhdmVpcyAKCiMjIENvcnJlbGHDp8O1ZXMKCmBgYHtyfQp3aW5lc19wYWRyICU+JQogIHNlbGVjdCgtdHlwZSkgJT4lCiAgZ2djb3JyKG1ldGhvZCA9IGMoInBhaXJ3aXNlIiwic3BlYXJtYW4iKSwgbGFiZWwgPSBGQUxTRSwgYW5nbGUgPSAtMCwgaGp1c3QgPSAwLjIpICsKICBjb29yZF9mbGlwKCkKYGBgCgpgYGB7ciwgZmlnLmhlaWdodD00fQp3aW5lc19hZGp1c3RlZCAlPiUKICBzZWxlY3QoLWNpdHJpY2FjaWQsIC10b3RhbHN1bGZ1cmRpb3hpZGUpICU+JQogIGdncGFpcnMoKQpgYGAKCiMjIEdyw6FmaWNvcyBkZSBkaXNwZXJzw6NvCgpgYGB7ciwgZmlnLmhlaWdodD01fQpwMSA8LSB3aW5lc19wYWRyICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBkZW5zaXR5LCB5ID0gYWxjb2hvbCwgY29sb3IgPSB0eXBlKSkgKwogICBnZW9tX3BvaW50KGFscGhhID0gMC4yLCBzaXplID0gMikgKwogICBnZW9tX3Ntb290aChtZXRob2QgPSAnbG0nKQoKcDIgPC0gd2luZXNfcGFkciAlPiUgCiAgZ2dwbG90KGFlcyh4ID0gZGVuc2l0eSwgeSA9IGZpeGVkYWNpZGl0eSwgY29sb3IgPSB0eXBlKSkgKwogICBnZW9tX3BvaW50KGFscGhhID0gMC4yLCBzaXplID0gMikgKwogICBnZW9tX3Ntb290aChtZXRob2QgPSAnbG0nKQoKcDMgPC0gd2luZXNfcGFkciAlPiUgCiAgZ2dwbG90KGFlcyh4ID0gdm9sYXRpbGVhY2lkaXR5LCB5ID0gcXVhbGl0eSwgY29sb3IgPSB0eXBlKSkgKwogICBnZW9tX3BvaW50KGFscGhhID0gMC4yLCBzaXplID0gMikgKwogICBnZW9tX3Ntb290aChtZXRob2QgPSAnbG0nKQoKcDQgPC0gd2luZXNfcGFkciAlPiUgCiAgZ2dwbG90KGFlcyh4ID0gYWxjb2hvbCwgeSA9IHF1YWxpdHksIGNvbG9yID0gdHlwZSkpICsKICAgZ2VvbV9wb2ludChhbHBoYSA9IDAuMiwgc2l6ZSA9IDIpICsKICAgZ2VvbV9zbW9vdGgobWV0aG9kID0gJ2xtJykKCnA1IDwtIHdpbmVzX3BhZHIgJT4lIAogIGdncGxvdChhZXMoeCA9IHJlc2lkdWFsc3VnYXIsIHkgPSBhbGNvaG9sLCBjb2xvciA9IHR5cGUpKSArCiAgIGdlb21fcG9pbnQoYWxwaGEgPSAwLjIsIHNpemUgPSAyKSArCiAgIGdlb21fc21vb3RoKG1ldGhvZCA9ICdsbScpCgpwNiA8LSB3aW5lc19wYWRyICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBkZW5zaXR5LCB5ID0gY2hsb3JpZGVzLCBjb2xvciA9IHR5cGUpKSArCiAgIGdlb21fcG9pbnQoYWxwaGEgPSAwLjIsIHNpemUgPSAyKSArCiAgIGdlb21fc21vb3RoKG1ldGhvZCA9ICdsbScpCgptdWx0aXBsb3QocDEsIHAyLCBwMywgcDQsIHA1LCBwNiwgY29scyA9IDIgKQoKYGBgCgojIFNlcGFyYcOnw6NvIGRvcyBkYWRvcyBkZSB0cmVpbm8gZSB0ZXN0ZQoKVXRpbGl6YXJlaSA4MCUgZG9zIGRhZG9zIHBhcmEgdHJlaW5vLCBlIDIwJSBwYXJhIHRlc3RlLgoKYGBge3J9CnNldC5zZWVkKDMyMzEyKQoKdHJhaW5faW5kIDwtIGZsb29yKDAuOCAqIG5yb3cod2luZXNfcGFkcikpICU+JQogIHNhbXBsZShzZXFfbGVuKG5yb3cod2luZXNfcGFkcikpLCBzaXplID0gLikKCnRyYWluIDwtIHdpbmVzX3BhZHJbdHJhaW5faW5kLCBdCnRlc3QgPC0gd2luZXNfcGFkclstdHJhaW5faW5kLCBdCmBgYAoKCiMgRXRhcGEgMSAtIEVzdGltYXRpdmEgZGEgdmFyacOhdmVsICoqcXVhbGl0eSoqCgojIyBSZWdyZXNzw6NvIGxpbmVhcgoKVGVzdGFuZG8gYSByZWdyZXNzw6NvIGxpbmVhciBjb20gdG9kYXMgYXMgdmFyacOhdmVpcy4KCiMjIyBQcmltZWlyYSB0ZW50YXRpdmEgZGUgc2UgZ2VyYXIgdW0gbW9kZWxvCgpgYGB7cn0KbG1fMDEgPC0gbG0ocXVhbGl0eSB+IGZpeGVkYWNpZGl0eSArIHZvbGF0aWxlYWNpZGl0eQogICAgICAgICAgICAgICArIGNpdHJpY2FjaWQgKyByZXNpZHVhbHN1Z2FyICsgY2hsb3JpZGVzCiAgICAgICAgICAgICAgICsgZnJlZXN1bGZ1cmRpb3hpZGUgKyB0b3RhbHN1bGZ1cmRpb3hpZGUKICAgICAgICAgICAgICAgKyBkZW5zaXR5ICsgcEggKyBzdWxwaGF0ZXMgKyBhbGNvaG9sICsgdHlwZSwgZGF0YT10cmFpbikKc3VtbWFyeShsbV8wMSkKYGBgCgpQZWxvcyByZXN1bHRhZG9zIGFjaW1hLCDDqSBwb3Nzw612ZWwgY29uY2x1aXIgcXVlIG8gbW9kZWxvIGxpbmVhciDDqSBlc3RhdGlzdGljYW1lbnRlIHNpZ25pZmljYW50ZSwgZGV2aWRvIG8gcC12YWx1ZSBzZXIgaW5mZXJpb3IgYSAwLDA1LiBFbnRyZXRhbnRvIGjDoSB2YXJpw6F2ZWlzIHF1ZSBuw6NvIHPDo28gc2lnbmlmaWNhbnRlcywgZSBwb2RlbSBzZXIgcmVtb3ZpZGFzLgoKCiMjIyBTZWd1bmRhIHRlbnRhdGl2YSBkZSBzZSBnZXJhciB1bSBtb2RlbG8KCkNvbW8gdmltb3MgbmEgc2XDp8OjbyBhbnRlcmlvciwgdmFtb3MgcmVtb3ZlciAqKmNpdHJpY2FjaWQqKiwgKipjaGxvcmlkZXMqKiBlICoqdG90YWxzdWxmdXJkaW94aWRlKiogZSBjcmlhciB1bSBub3ZvIG1vZGVsby4gQ29tbyB2aW1vcyBkdXJhbnRlIGEgYW7DoWxpc2UgZXhwbG9yYXTDs3JpYSwgaMOhIHVtYSBjb3JyZWxhw6fDo28gbXVpdG8gZm9ydGUgZW50cmUgKip0b3RhbHN1bGZ1cmRpb3hpZGUqKiBlICoqZnJlZXN1bGZ1cmRpb3hpZGUqKi4gUG9yIGVzc2UgbW90aXZvLCB1dGlsaXphcmVtb3Mgc29tZW50ZSB1bWEgZGVsYXMuCgpgYGB7cn0KbG1fMDIgPC0gbG0ocXVhbGl0eSB+IGZpeGVkYWNpZGl0eSArIHZvbGF0aWxlYWNpZGl0eQogICAgICAgICAgICAgICArIHJlc2lkdWFsc3VnYXIgKyBmcmVlc3VsZnVyZGlveGlkZSArIGRlbnNpdHkgCiAgICAgICAgICAgICAgICsgcEggKyBzdWxwaGF0ZXMgKyBhbGNvaG9sICsgdHlwZSwgZGF0YT10cmFpbikKc3VtbWFyeShsbV8wMikKYGBgCgpBcGVzYXIgZGUgdG9kYXMgYXMgdmFyacOhdmVpcyBpbmRlcGVuZGVudGVzIHBvc3N1w61yZW0gdW0gYmFpeG8gcC12YWx1ZSwgbyBub3NzbyBSLVNxdWFyZWQgc2UgZW5jb250cmEgdW0gcG91Y28gYmFpeG8uIE7Do28gZGVzY2FydGFyZWkgbmVjZXNzYXJpYW1lbnRlIG8gbW9kZWxvIGRldmlkbyBhIHVtIHZhbG9yIHBlcXVlbm8gZG8gUiBxdWFkcmFkby4gTmVzc2UgY2FzbyDDqSBtZWxob3IgdmVyaWZpY2FyIGEgYWN1csOhY2lhIGRhIHByZWRpw6fDo28gbm9zIGRhZG9zIGRlIHRlc3RlLgoKCiMjIyBBbsOhbGlzZSBkbyBtb2RlbG8gZXNjb2xoaWRvCgojIyMjIE1TRQoKYGBge3J9CnF1YWxpdHlQcmVkaWN0IDwtIHByZWRpY3QobG1fMDIsIHRlc3QsIGludGVydmFsID0gInByZWRpY3Rpb24iLCBsZXZlbCA9IDAuOTUpCgptc2UgPC0gbWVhbigodGVzdCRxdWFsaXR5ICAtIHF1YWxpdHlQcmVkaWN0WywxXSleMikKc3FydChtc2UpCmBgYAoKCiMjIyMgRXJybyB1c2FuZG8gbcOpZGlhCgpgYGB7cn0KZXJyb191c2FuZG9fbWVkaWEgPC0gbWVhbigodGVzdCRxdWFsaXR5ICAtIG1lYW4odGVzdCRxdWFsaXR5KSleMikKc3FydChlcnJvX3VzYW5kb19tZWRpYSkKYGBgCgoKIyMjIyBDb3JyZWxhw6fDo28gZW50cmUgdmFsb3JlcyByZWFpcyBlIHByZWRpdG9zCgpgYGB7cn0KYWN0dWFsc19wcmVkcyA8LSBkYXRhLmZyYW1lKGNiaW5kKGFjdHVhbHM9dGVzdCRxdWFsaXR5LCBwcmVkaWN0ZWRzPXF1YWxpdHlQcmVkaWN0WywxXSkpCmNvcnJlbGF0aW9uX2FjY3VyYWN5IDwtIGNvcihhY3R1YWxzX3ByZWRzKQpjb3JyZWxhdGlvbl9hY2N1cmFjeQpgYGAKCgojIyMjIE1pbiBNYXggQWNjdXJhY3kKYGBge3J9Cm1pbl9tYXhfYWNjdXJhY3kgPC0gbWVhbihhcHBseShhY3R1YWxzX3ByZWRzLCAxLCBtaW4pIC8gYXBwbHkoYWN0dWFsc19wcmVkcywgMSwgbWF4KSkKbWluX21heF9hY2N1cmFjeQpgYGAKCiMjIyMgTUFQRQpgYGB7cn0KbWFwZSA8LSBtZWFuKGFicygoYWN0dWFsc19wcmVkcyRwcmVkaWN0ZWRzIC0gYWN0dWFsc19wcmVkcyRhY3R1YWxzKSkvYWN0dWFsc19wcmVkcyRhY3R1YWxzKQptYXBlCmBgYAoKIyMjIyBHcsOhZmljbyBSZXPDrWR1bwoKw4kgcG9zc8OtdmVsIG5vdGFyIHF1ZSBvcyByZXPDrWR1b3MgZXN0w6NvIGRpc3BlcnNvcyBhbGVhdG9yaWFtZW50ZSBlbSB0b3JubyBkZSB6ZXJvLCBjb20gdmFyacOibmNpYSBjb25zdGFudGUuCgpgYGB7cn0KbG1fMDIgJT4lCiAgcmVzaWQoKSAlPiUKICBwbG90KHByZWRpY3QobG1fMDIpLCAuLCB4bGFiID0gIlByZWRpdG9yIGxpbmVhciIsIHlsYWIgPSAiUmVzaWR1b3MiKQphYmxpbmUoaCA9IDAsIGx0eSA9IDIpCgpsbV8wMiAlPiUKICByZXNpZCgpICU+JQogIGhpc3QobWFpbj0nSGlzdG9ncmFtYSBkb3MgcmVzw61kdW9zJykKYGBgCgojIyMjIERpc3RyaWJ1acOnw6NvIG5vcm1hbCBkb3MgZXJyb3MKCmBgYHtyfQpxcW5vcm0ocmVzaWR1YWxzKGxtXzAyKSwgeWxhYj0iUmVzw61kdW9zIix4bGFiPSJRdWFudGlzIHRlw7NyaWNvcyIsbWFpbj0iIikKcXFsaW5lKHJlc2lkdWFscyhsbV8wMikpCmBgYAoKIyMjIyBUZXN0ZSBkZSBTaGFwaXJvCgpgYGB7cn0Kc2hhcGlyby50ZXN0KHNhbXBsZShyZXNpZHVhbHMobG1fMDIpLCBzaXplID0gNDgwMCkpIApgYGAKCiMjIyMgSy1mb2xkIGNyb3NzIHZhbGlkYXRpb24KCmBgYHtyfQpzdXBwcmVzc1dhcm5pbmdzKENWbG0oZGF0YT13aW5lc19wYWRyLAogICAgZm9ybS5sbSA9IHF1YWxpdHkgfiBmaXhlZGFjaWRpdHkgKyB2b2xhdGlsZWFjaWRpdHkKICAgICAgICAgICAgICAgKyByZXNpZHVhbHN1Z2FyICsgZnJlZXN1bGZ1cmRpb3hpZGUgKyBkZW5zaXR5IAogICAgICAgICAgICAgICArIHBIICsgc3VscGhhdGVzICsgYWxjb2hvbCArIHR5cGUsIG09NSwgZG90cz1UUlVFLCBzZWVkPTI5LCBsZWdlbmQucG9zPSJ0b3BsZWZ0IiwgIHByaW50aXQ9RkFMU0UpKTsKYGBgCgojIyDDgXJ2b3JlIGRlIFJlZ3Jlc3PDo28KClBhcmEgY29tcGFyYXIgY29tIGEgcmVncmVzc8OjbyBsaW5lYXIsIGZvaSBmZWl0byBvIG1vZGVsbyBkZSDDoXJ2b3JlIGRlIHJlZ3Jlc3PDo28uCgojIyMgTW9kZWxvCgpgYGB7ciwgZmlnLmhlaWdodD03LCBmaWcud2lkdGggPSAxMX0KcmVncmVzc2lvbl90cmVlIDwtIHJwYXJ0IChxdWFsaXR5IH4gZml4ZWRhY2lkaXR5ICsgdm9sYXRpbGVhY2lkaXR5CiAgICAgICAgICAgICAgICsgY2l0cmljYWNpZCArIHJlc2lkdWFsc3VnYXIgKyBjaGxvcmlkZXMKICAgICAgICAgICAgICAgKyBmcmVlc3VsZnVyZGlveGlkZSArIHRvdGFsc3VsZnVyZGlveGlkZQogICAgICAgICAgICAgICArIGRlbnNpdHkgKyBwSCArIHN1bHBoYXRlcyArIGFsY29ob2wgKyB0eXBlLCBkYXRhPXRyYWluLCAKICAgICAgICAgICAgICAgY3AgPSAwLjAwMSwgbWluc3BsaXQgPSAxNSwgbWF4ZGVwdGg9MTUpCgpycGFydC5wbG90KHJlZ3Jlc3Npb25fdHJlZSwgdHlwZT00LCBleHRyYT0xLCB1bmRlcj1GQUxTRSwgY2xpcC5yaWdodC5sYWJzPVRSVUUsCiAgICAgICAgICAgZmFsbGVuLmxlYXZlcz1GQUxTRSwgICBkaWdpdHM9MiwgdmFybGVuPS0xMCwgZmFjbGVuPTIwLAogICAgICAgICAgIGNleD0wLjQsIHR3ZWFrPTEuNywKICAgICAgICAgICBjb21wcmVzcz1UUlVFLGJveC5wYWxldHRlPSJHcmF5cyIsCiAgICAgICAgICAgc25pcD1GQUxTRSkKYGBgCgojIyMgTVNFCgpgYGB7cn0KdmFsX3ByZWRfcmVncmVzc2lvbl90cmVlIDwtIHByZWRpY3QocmVncmVzc2lvbl90cmVlLCB0ZXN0LCBpbnRlcnZhbCA9ICJwcmVkaWN0aW9uIiwgbGV2ZWwgPSAwLjk1KSAKCm1zZV90cmVlIDwtIG1lYW4oKHRlc3QkcXVhbGl0eSAgLSB2YWxfcHJlZF9yZWdyZXNzaW9uX3RyZWUpXjIpCnNxcnQobXNlX3RyZWUpCmBgYAoKIyMjIENvcnJlbGHDp8OjbyBlbnRyZSB2YWxvcmVzIHJlYWlzIGUgcHJlZGl0b3MKCmBgYHtyfQphY3R1YWxzX3ByZWRzIDwtIGRhdGEuZnJhbWUoY2JpbmQoYWN0dWFscz10ZXN0JHF1YWxpdHksIHByZWRpY3RlZHM9dmFsX3ByZWRfcmVncmVzc2lvbl90cmVlKSkKY29ycmVsYXRpb25fYWNjdXJhY3kgPC0gY29yKGFjdHVhbHNfcHJlZHMpCmNvcnJlbGF0aW9uX2FjY3VyYWN5CmBgYAojIyMgR3LDoWZpY28gUmVzw61kdW8KCmBgYHtyfQpycyA8LSB2YWxfcHJlZF9yZWdyZXNzaW9uX3RyZWUgLSB0ZXN0JHF1YWxpdHkgCnBsb3QocHJlZGljdChyZWdyZXNzaW9uX3RyZWUsIHRlc3QpLCBycywgeGxhYiA9ICJDb20gw4Fydm9yZSBkZSBSZWdyZXNzw6NvIiwgeWxhYiA9ICJSZXNpZHVvcyIpCmFibGluZShoID0gMCwgbHR5ID0gMikKCmhpc3QocnMpCmBgYAoKIyMjIERpc3RyaWJ1acOnw6NvIGRvcyBlcnJvcyBhbGVhdMOzcmlvcwpgYGB7cn0KcXFub3JtKHJzLCB5bGFiPSJSZXPDrWR1b3MiLHhsYWI9IlF1YW50aXMgdGXDs3JpY29zIixtYWluPSIiKQpxcWxpbmUocnMpCmBgYAoKIyMgQ29uY2x1c8OjbwoKRW1ib3JhIGNvbSByZXN1bHRhZG9zIG11aXRvIHBhcmVjaWRvcywgYSB0w6ljbmljYSBkZSDDoXJ2b3JlIGRlIHJlZ3Jlc3PDo28gdGV2ZSB1bWEgYWN1csOhY2lhIHVtIHBvdWNvIG1haW9yLiDDiSBwb3Nzw612ZWwgcXVlIG8gZGVjZXBjaW9uYW50ZSByZXN1bHRhZG8gZGFzIGR1YXMgdMOpY25pY2FzIHNlIGRldmUgYW8gdHJhdGFtZW50byBkb3MgdmluaG9zIHRpbnRvcyBqdW50YW1lbnRlIGNvbSBvcyB2aW5ob3MgYnJhbmNvcy4gQ29tbyBkZXUgcGFyYSBwZXJjZWJlciBwZWxhIGFuw6FsaXNlIGV4cGxvcmF0w7NyaWEsIG9zIGRvaXMgdGlwb3MgZGUgdmluaG9zIHBvc3N1ZW0gYWxndW1hcyBjYXJhY3RlcsOtc3RpY2FzIGbDrXNpY28tcXXDrW1pY2FzIGNvbSBjb21wb3J0YW1lbnRvcyBkaWZlcmVudGVzLgoKIyBFdGFwYSAyIC0gQ2F0ZWdvcml6YW5kbyB2aW5ob3MgImJvbnMiIGUgInJ1aW5zIgoKIyMgQ3JpYcOnw6NvIGRhIHZhcmnDoXZlbCByZXNwb3N0YSBjYXRlZ8OzcmljYQoKYGBge3J9CndpbmVzX3BhZHIkZ29vZCA8LSBhcy5mYWN0b3IoaWZlbHNlKHdpbmVzX3BhZHIkcXVhbGl0eSA+PSA2LDEsMCkpCnN1bW1hcnkod2luZXNfcGFkciRnb29kKQoKdHJhaW4gPC0gd2luZXNfcGFkclt0cmFpbl9pbmQsIF0KdGVzdCA8LSB3aW5lc19wYWRyWy10cmFpbl9pbmQsIF0KYGBgCgojIyDDgXJ2b3JlIGRlIERlY2lzw6NvCgojIyMgQ29uc3RydcOnw6NvIGRvIG1vZGVsbwoKYGBge3J9CmRlY2lzaW9uX3RyZWUgPC0gcnBhcnQgKHRyYWluJGdvb2QgfiBmaXhlZGFjaWRpdHkgKyB2b2xhdGlsZWFjaWRpdHkKICAgICAgICAgICAgICAgICAgICAgICArIGNpdHJpY2FjaWQgKyByZXNpZHVhbHN1Z2FyICsgY2hsb3JpZGVzCiAgICAgICAgICAgICAgICAgICAgICAgKyBmcmVlc3VsZnVyZGlveGlkZSArIHRvdGFsc3VsZnVyZGlveGlkZQogICAgICAgICAgICAgICAgICAgICAgICsgZGVuc2l0eSArIHBIICsgc3VscGhhdGVzICsgYWxjb2hvbCArIHR5cGUsIAogICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSB0cmFpbikKCmZhbmN5UnBhcnRQbG90KGRlY2lzaW9uX3RyZWUpCgpgYGAKCiMjIyBBdmFsaWFuZG8gbyBNb2RlbG8KCiMjIyMgTWF0cml6IGRlIENvbmZ1c8OjbwoKCgpgYGB7cn0KZGVjaXNpb25fdHJlZV9wcmVkaWN0IDwtIHByZWRpY3QoZGVjaXNpb25fdHJlZSwgdGVzdCwgdHlwZT0nY2xhc3MnKQoKY29uZnVzaW9uX21hdHJpeCA8LSB0YWJsZSh0ZXN0JGdvb2QsIGRlY2lzaW9uX3RyZWVfcHJlZGljdCkKY29uZnVzaW9uX21hdHJpeApgYGAKCiMjIyMgQWN1csOhY2lhCgpgYGB7cn0KZGlhZ29uYWwgPC0gZGlhZyhjb25mdXNpb25fbWF0cml4KQpBY2MgPC0gIHN1bShkaWFnb25hbCkvc3VtKGNvbmZ1c2lvbl9tYXRyaXgpCkFjYwpgYGAKCiMjIFJlZ3Jlc3PDo28gTG9nw61zdGljYQoKIyMjIFByaW1laXJvIE1vZGVsbwoKYGBge3J9CmxvZ18wMSA8LSBnbG0odHJhaW4kZ29vZCB+IGZpeGVkYWNpZGl0eSArIHZvbGF0aWxlYWNpZGl0eQogICAgICAgICAgICAgICArIGNpdHJpY2FjaWQgKyByZXNpZHVhbHN1Z2FyICsgY2hsb3JpZGVzCiAgICAgICAgICAgICAgICsgZnJlZXN1bGZ1cmRpb3hpZGUgKyB0b3RhbHN1bGZ1cmRpb3hpZGUKICAgICAgICAgICAgICAgKyBkZW5zaXR5ICsgcEggKyBzdWxwaGF0ZXMgKyBhbGNvaG9sICsgdHlwZSwgdHJhaW4sIGZhbWlseT1iaW5vbWlhbChsaW5rPWxvZ2l0KSkKCnN1bW1hcnkobG9nXzAxKQpgYGAKCiMjIFNlZ3VuZG8gTW9kZWxvCgpPIHNlZ3VuZG8gbW9kZWxvIGZvaSBmZWl0byByZW1vdmVuZG8gYXMgdmFyacOhdmVpcyBxdWUgcG9zc3XDrWFtIHVtIHAtdmFsdWUgbXVpdG8gZWxldmFkbyBubyBwcmltZWlybyBtb2RlbG8uCgpgYGB7cn0KbG9nXzAyIDwtIGdsbSh0cmFpbiRnb29kIH4gZml4ZWRhY2lkaXR5ICsgdm9sYXRpbGVhY2lkaXR5CiAgICAgICAgICAgICAgICsgY2l0cmljYWNpZCArIHJlc2lkdWFsc3VnYXIKICAgICAgICAgICAgICAgKyBmcmVlc3VsZnVyZGlveGlkZSArIHRvdGFsc3VsZnVyZGlveGlkZQogICAgICAgICAgICAgICArIGRlbnNpdHkgKyBwSCArIHN1bHBoYXRlcyArIGFsY29ob2wgKyB0eXBlLCB0cmFpbiwgZmFtaWx5PWJpbm9taWFsKGxpbms9bG9naXQpKQoKc3VtbWFyeShsb2dfMDIpCmBgYAoKIyMjIE1hdHJpeiBkZSBjb25mdXPDo28KCmBgYHtyfQpsb2dpc3RpY19yZWdyZXNzaW9uX3ByZWRpY3QgPC0gcHJlZGljdChsb2dfMDIsIHRlc3QsIHR5cGU9InJlc3BvbnNlIikgJT4lCiAgY3V0KC4sIGJyZWFrcz1jKDAsMC41MCwxKSwgcmlnaHQ9RikKCk1DIDwtIHRhYmxlKHRlc3QkZ29vZCwgIGxvZ2lzdGljX3JlZ3Jlc3Npb25fcHJlZGljdCAsIGRlcGFyc2UubGV2ZWwgPSAyKSAjIG1vbnRhciBhIG1hdHJpeiBkZSBjb25mdXPDo28gIApzaG93KE1DKQpgYGAKIyMjIEFjdXLDoWNpYQoKYGBge3J9CkFDQyA9IHN1bShkaWFnKE1DKSkvc3VtKE1DKSAKc2hvdyhBQ0MpCmBgYAoKIyMjIEdyw6FmaWNvIFJPQwoKYGBge3J9CnAgPC0gcHJlZGljdChsb2dfMDIsIHRlc3QsIHR5cGU9InJlc3BvbnNlIikKcHIgPC0gcHJlZGljdGlvbihwLCB0ZXN0JGdvb2QpCnByZiA8LSBwZXJmb3JtYW5jZShwciwgbWVhc3VyZSA9ICJ0cHIiLCB4Lm1lYXN1cmUgPSAiZnByIikKcGxvdChwcmYpCgphdWMgPC0gcGVyZm9ybWFuY2UocHIsIG1lYXN1cmUgPSAiYXVjIikKYXVjIDwtIGF1Y0B5LnZhbHVlc1tbMV1dCmF1YwpgYGAKCiMjIENvbmNsdXPDo28KCkFzIHTDqWNuaWNhcyBkZSDDoXJ2b3JlIGRlIGRlY2lzw6NvIGUgcmVncmVzc8OjbyBsb2fDrXN0aWNhIGFwcmVzZW50YXJhbSByZXN1bHRhZG9zIG11aXRvIHNpbWlsYXJlcy4gRW0gY29udHJhc3RlIGNvbSBhIHByaW1laXJhIGV0YXBhLCBvcyBtb2RlbG9zIHBhcmEgcHJldmlzw6NvIGRlIHVtYSB2YXJpw6F2ZWwgY2F0ZWfDs3JpY2EgZGUgcXVhbGlkYWRlIHRpdmVyYW0gdW1hIGFjdXLDoWNpYSBtdWl0byBtYWlvci4KCiMgRXRhcGEgMzogRGVmaW5pciBncnVwb3MgZGUgdmluaG9zCgpgYGB7cn0Kd2luZXNfcGFkciA8LSB3aW5lc19wYWRyICU+JSAKICBzZWxlY3QoLXF1YWxpdHksIC1nb29kLCAtdHlwZSkKYGBgCgojIyBEZXRlcm1pbmHDp8OjbyBkZSBxdWFudGlkYWRlIGNsdXN0ZXJzCgpgYGB7cn0Kd3NzIDwtIChucm93KHdpbmVzX3BhZHIpLTEpKnN1bShhcHBseSh3aW5lc19wYWRyLDIsdmFyKSkKZm9yIChpIGluIDI6MjApIHdzc1tpXSA8LSBzdW0oa21lYW5zKHdpbmVzX3BhZHIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjZW50ZXJzPWkpJHdpdGhpbnNzKQpwbG90KDE6MjAsIHdzcywgdHlwZT0iYiIsIHhsYWI9Ik7Dum1lcm8gZGUgY2x1c3RlcnMiLAogICAgIHlsYWI9IlNvbWEgZGUgcXVhZHJhZG9zIGludHJhLWNsdXN0ZXJzIikgCmBgYAoKQW5hbGlzYW5kbyBvIGdyw6FmaWNvIGFjaW1hLCBvcHRlaSBwb3IgZGl2aWRpciBvcyB2aW5ob3MgZW0gNiBncnVwb3MgZGlzdGludG9zLgoKIyMgQ2x1c3RlciBIaWVyw6FycXVpY28KCklkZW50aWZpY2FuZG8gb3MgZ3J1cG9zIG5vIERlbmRvZ3JhbWEuCgpgYGB7cn0KaGllcl9jbHVzdGVyIDwtIGhjbHVzdChkaXN0KHdpbmVzX3BhZHIpLG1ldGhvZD0nd2FyZC5EMicpCmQgPC0gZGlzdCh3aW5lc19wYWRyLCBtZXRob2QgPSAiZXVjbGlkZWFuIikKcGxvdChoaWVyX2NsdXN0ZXIsIHlsYWI9J2Rpc3RhbmNpYScsIGNleD0wLjYpCgpncm91cHMgPC0gY3V0cmVlKGhpZXJfY2x1c3Rlciwgaz02KSAjSWRlbnRpZmljYW5kbyBvcyA2IGRpZmVyZW50ZXMgZ3J1cG9zCnJlY3QuaGNsdXN0KGhpZXJfY2x1c3Rlciwgaz02LCBib3JkZXI9InJlZCIpIApgYGAKCgoKIyMgSy1NZWFucwoKYGBge3J9CnNldC5zZWVkKDEyMzIpCm91dHB1dF9jbHVzdGVyIDwtIGttZWFucyh3aW5lc19wYWRyLCA2LCBpdGVyPTEwMCkKb3V0cHV0X2NsdXN0ZXIKYGBgCgpgYGB7cn0KdGFibGUob3V0cHV0X2NsdXN0ZXIkY2x1c3RlcikKYGBgCgpWaXN1YWxpemFuZG8gbyByZXN1bHRhZG8gZGUgdW1hIGZvcm1hIGdyw6FmaWNhCgpgYGB7ciwgZmlnLmhlaWdodCA9IDR9CnBsb3QodGttZWFucyh3aW5lc19wYWRyICwgayA9IDYsIGFscGhhID0gMC4wMSkpCmBgYAoKIyMgUENBCgpWYW1vcyB0ZW50YXIgcmVkdXppciBvIG7Dum1lcm8gZGUgdmFyacOhdmVpcyBlIHZlcmlmaWNhciBvIGltcGFjdG8gcXVlIGlzc28gdGVyw6EgbmEgY2x1c3Rlcml6YcOnw6NvLgoKYGBge3J9CmFjcGNvcl93aW5lcyA8LSBwcmNvbXAod2luZXNfcGFkciwgc2NhbGUgPSBUUlVFLCByZXR4ID0gVFJVRSkgCnN1bW1hcnkoYWNwY29yX3dpbmVzKQpgYGAKCmBgYHtyfQpwbG90KDE6bmNvbCh3aW5lc19wYWRyKSwgYWNwY29yX3dpbmVzJHNkZXZeMiwgdHlwZSA9ICJiIiwgeGxhYiA9ICJDb21wb25lbnRlIiwKICAgICB5bGFiID0gIlZhcmnDom5jaWEiLCBwY2ggPSAyMCwgY2V4LmF4aXMgPSAwLjgsIGNleC5sYWIgPSAwLjgpCmBgYAoKUGVsYXMgaW5mb3JtYcOnw7VlcyBhY2ltYSwgw6kgcG9zc8OtdmVsIHZlcmlmaWNhciBxdWUgYW8gb3B0YXIgcGVsb3MgNiBwcmltZWlyb3MgY29tcG9uZW50ZXMsIG9idMOpbS1zZSA4NSUgZGEgdmFyacOibmNpYSBkb3MgZGFkb3MuCgpgYGB7cn0KZXNjb3JlMSA8LSBhY3Bjb3Jfd2luZXMkeFssIDFdCmVzY29yZTIgPC0gYWNwY29yX3dpbmVzJHhbLCAyXQplc2NvcmUzIDwtIGFjcGNvcl93aW5lcyR4WywgM10KZXNjb3JlNCA8LSBhY3Bjb3Jfd2luZXMkeFssIDRdCmVzY29yZTUgPC0gYWNwY29yX3dpbmVzJHhbLCA1XQplc2NvcmU2IDwtIGFjcGNvcl93aW5lcyR4WywgNl0KCndpbmVzX2NwYSA8LWNiaW5kKGVzY29yZTEsIGVzY29yZTIsIGVzY29yZTMsIGVzY29yZTQsIGVzY29yZTUsIGVzY29yZTYpCgp3c3MgPC0gKG5yb3cod2luZXNfY3BhKS0xKSpzdW0oYXBwbHkod2luZXNfY3BhLDIsdmFyKSkKZm9yIChpIGluIDI6MjApIHdzc1tpXSA8LSBzdW0oa21lYW5zKHdpbmVzX2NwYSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNlbnRlcnM9aSkkd2l0aGluc3MpCnBsb3QoMToyMCwgd3NzLCB0eXBlPSJiIiwgeGxhYj0iTnVtYmVyIG9mIENsdXN0ZXJzIiwKICAgICB5bGFiPSJXaXRoaW4gZ3JvdXBzIHN1bSBvZiBzcXVhcmVzIikgCmBgYAoKIyMjIEstTWVhbnMgY29tIFBDQQpgYGB7cn0Kc2V0LnNlZWQoNTQyNSkKb3V0cHV0X2NsdXN0ZXJfcGNhIDwtIGttZWFucyh3aW5lc19jcGEsIDYsaXRlcj0xMDApCm91dHB1dF9jbHVzdGVyCmBgYApgYGB7cn0KdGFibGUob3V0cHV0X2NsdXN0ZXJfcGNhJGNsdXN0ZXIpCmBgYAoKYGBge3IsIGZpZy5oZWlnaHQgPSA0fQpwbG90KHRrbWVhbnMod2luZXNfY3BhICwgayA9IDYsIGFscGhhID0gMC4wMSkpCmBgYAoKIyMjIENydXphbmRvIG9zIGNsdXN0ZXJzCgpBZ29yYSB2b3UgY3J1emFyIG9zIHJlc3VsdGFkb3Mgb2J0aWRvcyBhIHBhcnRpciBkYXMgdmFyacOhdmVpcyBjb20gb3MgcmVzdWx0YWRvcyBvYml0b3MgcGVsb3MgY29tcG9uZW50ZXMgcHJpbmNpcGFpcy4KCmBgYHtyfQpjcnV6YW1lbnRvIDwtIGRhdGEuZnJhbWUoY2JpbmQob3V0cHV0X2NsdXN0ZXIkY2x1c3Rlciwgb3V0cHV0X2NsdXN0ZXJfcGNhJGNsdXN0ZXIpKQpjb2xuYW1lcyhjcnV6YW1lbnRvKSA8LSBjKCJzZW1fcGNhIiwgImNvbV9wY2EiKQpjcnV6YW1lbnRvICU+JQogIG11dGF0ZShzZW1fcGNhID0gYXMuZmFjdG9yKHNlbV9wY2EpLCBjb21fcGNhID0gYXMuZmFjdG9yKGNvbV9wY2EpKSAlPiUKICBnZ3Bsb3QoYWVzKHNlbV9wY2EsIGNvbV9wY2EpKSArCiAgZ2VvbV9jb3VudChjb2xvdXIgPSAiYmx1ZSIpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAiYm90dG9tIikKCmBgYAoKTyBjb3VudCBwbG90IGFjaW1hIGluZGljYSBxdWUsIGFwZXNhciBkZSBoYXZlciBtdWRhbsOnYSBuYSBmb3JtYSBjb21vIG9zIHZpbmhvcyBmb3JhbSBjbGFzc2lmaWNhZG9zLCBlbSBzdWEgbWFpb3JpYSBvcyBhZ3J1cGFtZW50b3MgY29udGludWFyYW0gcGFyZWNpZG9zLgo=